{"id":"https://openalex.org/W1975191211","doi":"https://doi.org/10.3115/1119250.1119251","title":"Unsupervised training for overlapping ambiguity resolution in Chinese word segmentation","display_name":"Unsupervised training for overlapping ambiguity resolution in Chinese word segmentation","publication_year":2003,"publication_date":"2003-01-01","ids":{"openalex":"https://openalex.org/W1975191211","doi":"https://doi.org/10.3115/1119250.1119251","mag":"1975191211"},"language":"en","primary_location":{"id":"doi:10.3115/1119250.1119251","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1119250.1119251","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1119250.1119251","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the second SIGHAN workshop on Chinese language processing  -","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.3115/1119250.1119251","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100399461","display_name":"Mu Li","orcid":"https://orcid.org/0000-0002-4433-2301"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mu Li","raw_affiliation_strings":["Microsoft Research, Asia, Beijing, China","Microsoft research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114910293","display_name":"Jianfeng Gao","orcid":"https://orcid.org/0000-0002-5702-6143"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianfeng Gao","raw_affiliation_strings":["Microsoft Research, Asia, Beijing, China","Microsoft research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066941389","display_name":"Changning Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changning Huang","raw_affiliation_strings":["Microsoft Research, Asia, Beijing, China","Microsoft research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100462032","display_name":"Jian\u2010Feng Li","orcid":"https://orcid.org/0000-0003-1598-6856"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianfeng Li","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China",", University of Science and Technology of China, Hefei, China#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]},{"raw_affiliation_string":", University of Science and Technology of China, Hefei, China#TAB#","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100399461"],"corresponding_institution_ids":["https://openalex.org/I4210113369"],"apc_list":null,"apc_paid":null,"fwci":3.198,"has_fulltext":true,"cited_by_count":31,"citation_normalized_percentile":{"value":0.91984616,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"17","issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8225351572036743},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7451881766319275},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.7065935134887695},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6312269568443298},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6116205453872681},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6088055372238159},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.5280483365058899},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.5188530683517456},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5178589820861816},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4802035093307495},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4782349467277527},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4562950134277344},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.45076829195022583},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39364951848983765},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33401429653167725},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.06300747394561768}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8225351572036743},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7451881766319275},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.7065935134887695},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6312269568443298},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6116205453872681},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6088055372238159},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.5280483365058899},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.5188530683517456},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5178589820861816},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4802035093307495},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4782349467277527},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4562950134277344},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.45076829195022583},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39364951848983765},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33401429653167725},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.06300747394561768},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3115/1119250.1119251","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1119250.1119251","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1119250.1119251","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the second SIGHAN workshop on Chinese language processing  -","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.3115/1119250.1119251","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1119250.1119251","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1119250.1119251","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the second SIGHAN workshop on Chinese language processing  -","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7799999713897705}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1975191211.pdf","grobid_xml":"https://content.openalex.org/works/W1975191211.grobid-xml"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W954474456","https://openalex.org/W1490150939","https://openalex.org/W1548836941","https://openalex.org/W1618059599","https://openalex.org/W1798908922","https://openalex.org/W1830858031","https://openalex.org/W1840762595","https://openalex.org/W1914695961","https://openalex.org/W1954848232","https://openalex.org/W1974967573","https://openalex.org/W2249186037","https://openalex.org/W2310438174","https://openalex.org/W2383967577","https://openalex.org/W2403515192","https://openalex.org/W2952828185","https://openalex.org/W3016210511","https://openalex.org/W3017143921","https://openalex.org/W3142057967","https://openalex.org/W4250516003","https://openalex.org/W4255764218"],"related_works":["https://openalex.org/W2103942400","https://openalex.org/W1999699871","https://openalex.org/W4225124612","https://openalex.org/W2043806667","https://openalex.org/W2021633306","https://openalex.org/W2006801911","https://openalex.org/W2033669961","https://openalex.org/W2971899271","https://openalex.org/W1972167985","https://openalex.org/W2350644419"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"an":[3,17,29,71],"unsupervised":[4],"training":[5,80],"approach":[6,54,69],"to":[7],"resolving":[8],"overlapping":[9],"ambiguities":[10],"in":[11,37],"Chinese":[12,31],"word":[13],"segmentation.":[14],"We":[15],"present":[16],"ensemble":[18],"of":[19,45,52,73],"adapted":[20],"Na\u00efve":[21],"Bayesian":[22],"classifiers":[23,35],"that":[24,38,66],"can":[25],"be":[26],"trained":[27],"using":[28],"unlabelled":[30],"text":[32],"corpus.":[33],"These":[34],"differ":[36],"they":[39],"use":[40],"context":[41],"words":[42],"within":[43],"windows":[44],"different":[46],"sizes":[47],"as":[48],"features.":[49],"The":[50],"performance":[51],"our":[53],"is":[55],"evaluated":[56],"on":[57],"a":[58],"manually":[59],"annotated":[60],"test":[61],"set.":[62],"Experimental":[63],"results":[64],"show":[65],"the":[67,76],"proposed":[68],"achieves":[70],"accuracy":[72],"94.3%,":[74],"rivaling":[75],"rule-based":[77],"and":[78],"supervised":[79],"methods.":[81]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
