{"id":"https://openalex.org/W2122228338","doi":"https://doi.org/10.3115/1220175.1220260","title":"Contextual dependencies in unsupervised word segmentation","display_name":"Contextual dependencies in unsupervised word segmentation","publication_year":2006,"publication_date":"2006-01-01","ids":{"openalex":"https://openalex.org/W2122228338","doi":"https://doi.org/10.3115/1220175.1220260","mag":"2122228338"},"language":"en","primary_location":{"id":"doi:10.3115/1220175.1220260","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1220175.1220260","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=1220260&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the ACL  - ACL '06","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=1220260&type=pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075564798","display_name":"Sharon Goldwater","orcid":"https://orcid.org/0000-0002-7298-0947"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sharon Goldwater","raw_affiliation_strings":["Brown University, Providence, RI","Brown University, Providence, RI,"],"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI","institution_ids":["https://openalex.org/I27804330"]},{"raw_affiliation_string":"Brown University, Providence, RI,","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077079119","display_name":"Thomas L. Griffiths","orcid":"https://orcid.org/0000-0002-5138-7255"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Thomas L. Griffiths","raw_affiliation_strings":["Brown University, Providence, RI","Brown University, Providence, RI,"],"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI","institution_ids":["https://openalex.org/I27804330"]},{"raw_affiliation_string":"Brown University, Providence, RI,","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034461489","display_name":"Mark Johnson","orcid":"https://orcid.org/0000-0003-4809-8441"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mark Johnson","raw_affiliation_strings":["Brown University, Providence, RI","Brown University, Providence, RI,"],"affiliations":[{"raw_affiliation_string":"Brown University, Providence, RI","institution_ids":["https://openalex.org/I27804330"]},{"raw_affiliation_string":"Brown University, Providence, RI,","institution_ids":["https://openalex.org/I27804330"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5075564798"],"corresponding_institution_ids":["https://openalex.org/I27804330"],"apc_list":null,"apc_paid":null,"fwci":29.8055,"has_fulltext":true,"cited_by_count":312,"citation_normalized_percentile":{"value":0.99737681,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"673","last_page":"680"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bigram","display_name":"Bigram","score":0.9878766536712646},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8051499724388123},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7458579540252686},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6682798862457275},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.6638283729553223},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6186724305152893},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.5700903534889221},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5663514137268066},{"id":"https://openalex.org/keywords/market-segmentation","display_name":"Market segmentation","score":0.5034303069114685},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.4469258189201355},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35960108041763306},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3504287600517273},{"id":"https://openalex.org/keywords/trigram","display_name":"Trigram","score":0.0789271891117096},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0766567587852478}],"concepts":[{"id":"https://openalex.org/C108757681","wikidata":"https://www.wikidata.org/wiki/Q2773912","display_name":"Bigram","level":3,"score":0.9878766536712646},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8051499724388123},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7458579540252686},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6682798862457275},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.6638283729553223},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6186724305152893},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.5700903534889221},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5663514137268066},{"id":"https://openalex.org/C125308379","wikidata":"https://www.wikidata.org/wiki/Q363057","display_name":"Market segmentation","level":2,"score":0.5034303069114685},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.4469258189201355},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35960108041763306},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3504287600517273},{"id":"https://openalex.org/C137546455","wikidata":"https://www.wikidata.org/wiki/Q3213474","display_name":"Trigram","level":2,"score":0.0789271891117096},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0766567587852478},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.3115/1220175.1220260","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1220175.1220260","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=1220260&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the ACL  - ACL '06","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:publications/bbfab021-df70-41ba-8d06-43273aeec9de","is_oa":true,"landing_page_url":"http://hdl.handle.net/20.500.11820/bbfab021-df70-41ba-8d06-43273aeec9de","pdf_url":"https://www.pure.ed.ac.uk/ws/files/7693581/P06_1085.pdf","source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.143.7206","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.143.7206","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://acl.ldc.upenn.edu/P/P06/P06-1085.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.184.7960","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.184.7960","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://homepages.inf.ed.ac.uk/sgwater/papers/acl06.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.94.7630","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.94.7630","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://cocosci.berkeley.edu/tom/papers/wordseg1.pdf","raw_type":"text"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/bbfab021-df70-41ba-8d06-43273aeec9de","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/bbfab021-df70-41ba-8d06-43273aeec9de","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Goldwater, S, Griffiths, T L & Johnson, M 2006, Contextual Dependencies in Unsupervised Word Segmentation. in Proceedings of the 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics. Sydney, Australia, pp. 673-680. https://doi.org/10.3115/1220175.1220260","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"doi:10.3115/1220175.1220260","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1220175.1220260","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=1220260&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the ACL  - ACL '06","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.699999988079071,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1406903026","display_name":"Learning and Action in the Face of Uncertainty: Cognitive,  Computational and Statistical Approaches","funder_award_id":"9870676","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6592538899","display_name":null,"funder_award_id":"DC000314","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G8164471825","display_name":null,"funder_award_id":"IGERT","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2122228338.pdf","grobid_xml":"https://content.openalex.org/works/W2122228338.grobid-xml"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W263845233","https://openalex.org/W1526685788","https://openalex.org/W1967687583","https://openalex.org/W1995991622","https://openalex.org/W2005902041","https://openalex.org/W2074546930","https://openalex.org/W2115867364","https://openalex.org/W2130416410","https://openalex.org/W2142263282","https://openalex.org/W2154756108","https://openalex.org/W2158266063","https://openalex.org/W2159399018","https://openalex.org/W2161952424","https://openalex.org/W2882319491","https://openalex.org/W2952343510","https://openalex.org/W3136512150","https://openalex.org/W4251043592","https://openalex.org/W4251644969"],"related_works":["https://openalex.org/W2197825247","https://openalex.org/W2126405470","https://openalex.org/W4388551919","https://openalex.org/W111205082","https://openalex.org/W2053243610","https://openalex.org/W2136165248","https://openalex.org/W1967541735","https://openalex.org/W1831215082","https://openalex.org/W2979502264","https://openalex.org/W2266240920"],"abstract_inverted_index":{"Developing":[0],"better":[1],"methods":[2,36],"for":[3,11,65],"segmenting":[4],"continuous":[5],"text":[6],"into":[7],"words":[8],"is":[9],"important":[10],"improving":[12],"the":[13,52,60],"processing":[14],"of":[15,43,62],"Asian":[16],"languages,":[17],"and":[18,40],"may":[19],"shed":[20],"light":[21],"on":[22,77],"how":[23],"humans":[24],"learn":[25],"to":[26],"segment":[27],"speech.":[28],"We":[29,68],"propose":[30],"two":[31],"new":[32],"Bayesian":[33],"word":[34,44,66],"segmentation":[35],"that":[37,71],"assume":[38],"unigram":[39,53],"bigram":[41,48],"models":[42,74],"dependencies":[45,64],"respectively.":[46],"The":[47],"model":[49,54],"greatly":[50],"outperforms":[51],"(and":[55],"previous":[56,72],"probabilistic":[57,73],"models),":[58],"demonstrating":[59],"importance":[61],"such":[63],"segmentation.":[67],"also":[69],"show":[70],"rely":[75],"crucially":[76],"sub-optimal":[78],"search":[79],"procedures.":[80]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":17},{"year":2016,"cited_by_count":11},{"year":2015,"cited_by_count":11},{"year":2014,"cited_by_count":31},{"year":2013,"cited_by_count":28},{"year":2012,"cited_by_count":25}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
