{"id":"https://openalex.org/W1971678616","doi":"https://doi.org/10.1145/1781134.1781135","title":"A Unified Character-Based Tagging Framework for Chinese Word Segmentation","display_name":"A Unified Character-Based Tagging Framework for Chinese Word Segmentation","publication_year":2010,"publication_date":"2010-06-01","ids":{"openalex":"https://openalex.org/W1971678616","doi":"https://doi.org/10.1145/1781134.1781135","mag":"1971678616"},"language":"en","primary_location":{"id":"doi:10.1145/1781134.1781135","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1781134.1781135","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100457332","display_name":"Hai Zhao","orcid":"https://orcid.org/0000-0002-3392-2584"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hai Zhao","raw_affiliation_strings":["Shanghai Jiao Tong University &amp; Soochow University","Shanghai Jiao Tong University & Soochow University#TAB#"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University &amp; Soochow University","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Shanghai Jiao Tong University & Soochow University#TAB#","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066941389","display_name":"Changning Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chang-Ning Huang","raw_affiliation_strings":["Microsoft Research Asia","Microsoft Research Asia,"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft Research Asia,","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100399461","display_name":"Mu Li","orcid":"https://orcid.org/0000-0002-4433-2301"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mu Li","raw_affiliation_strings":["Microsoft Research Asia","Microsoft Research Asia,"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft Research Asia,","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040440605","display_name":"Bao\u2010Liang Lu","orcid":"https://orcid.org/0000-0001-8359-0058"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bao-Liang Lu","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100457332"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":8.1188,"has_fulltext":false,"cited_by_count":80,"citation_normalized_percentile":{"value":0.97358356,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"9","issue":"2","first_page":"1","last_page":"32"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11587","display_name":"Second Language Acquisition and Learning","score":0.9674999713897705,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8419442176818848},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6706721186637878},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6398090124130249},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6077479720115662},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6035259366035461},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5681518912315369},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.5609959363937378},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5601135492324829},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5596463680267334},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.5364082455635071},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.4326215982437134},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32223495841026306},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.16333723068237305}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8419442176818848},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6706721186637878},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6398090124130249},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6077479720115662},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6035259366035461},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5681518912315369},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.5609959363937378},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5601135492324829},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5596463680267334},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.5364082455635071},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.4326215982437134},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32223495841026306},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.16333723068237305},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1781134.1781135","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1781134.1781135","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2783483863","display_name":null,"funder_award_id":"2008AA02Z315","funder_id":"https://openalex.org/F4320321540","funder_display_name":"Ministry of Science and Technology of the People's Republic of China"},{"id":"https://openalex.org/G7900743146","display_name":null,"funder_award_id":"6090311990820018607730902009CB320901","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320308943","display_name":"Microsoft Research","ror":"https://ror.org/00d0nc645"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321540","display_name":"Ministry of Science and Technology of the People's Republic of China","ror":"https://ror.org/027s68j25"},{"id":"https://openalex.org/F4320324720","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W25062297","https://openalex.org/W105490757","https://openalex.org/W121758096","https://openalex.org/W124342275","https://openalex.org/W1486819475","https://openalex.org/W1558333962","https://openalex.org/W1575907248","https://openalex.org/W1773803948","https://openalex.org/W1967084801","https://openalex.org/W1975191211","https://openalex.org/W1979145089","https://openalex.org/W1979557360","https://openalex.org/W1993120055","https://openalex.org/W2033295622","https://openalex.org/W2035304092","https://openalex.org/W2036516910","https://openalex.org/W2036701977","https://openalex.org/W2046199218","https://openalex.org/W2095624699","https://openalex.org/W2102667697","https://openalex.org/W2115629999","https://openalex.org/W2116317530","https://openalex.org/W2117400858","https://openalex.org/W2117689109","https://openalex.org/W2118563017","https://openalex.org/W2140016149","https://openalex.org/W2142263282","https://openalex.org/W2147880316","https://openalex.org/W2156515921","https://openalex.org/W2158049734","https://openalex.org/W2163377725","https://openalex.org/W2217873082","https://openalex.org/W2250667930","https://openalex.org/W2251726058","https://openalex.org/W2252066972","https://openalex.org/W2252083035","https://openalex.org/W2467575451","https://openalex.org/W2483327705","https://openalex.org/W2560674852","https://openalex.org/W2598654328","https://openalex.org/W2751555667","https://openalex.org/W2785519942","https://openalex.org/W2785522575","https://openalex.org/W2786124691","https://openalex.org/W2930957955","https://openalex.org/W2996160789","https://openalex.org/W2998314454","https://openalex.org/W3010392501","https://openalex.org/W3145501851","https://openalex.org/W4285719527","https://openalex.org/W4394729942","https://openalex.org/W4402683075","https://openalex.org/W6683955732"],"related_works":["https://openalex.org/W2978383222","https://openalex.org/W2172629291","https://openalex.org/W2380773642","https://openalex.org/W2384559435","https://openalex.org/W2337707338","https://openalex.org/W2393940967","https://openalex.org/W2058548953","https://openalex.org/W2785359773","https://openalex.org/W2385598138","https://openalex.org/W2346578824"],"abstract_inverted_index":{"Chinese":[0,8,39,256],"word":[1,23,40,257],"segmentation":[2,29,41,232,258],"is":[3,13,21,77,95,130,142,150,165,214,219,223],"an":[4,208,228],"active":[5],"area":[6],"in":[7,24,62,79,83,102,114,190,249],"language":[9],"processing":[10],"though":[11],"it":[12],"suffering":[14],"from":[15,68,99,196],"the":[16,85,91,119,191,241],"argument":[17],"about":[18],"what":[19],"precisely":[20],"a":[22,43,53,58,80,131,146,162,172,183,197],"Chinese.":[25],"Based":[26,239],"on":[27,89,240],"corpus-based":[28],"standard,":[30],"we":[31,37],"launched":[32],"this":[33,63],"study.":[34],"In":[35,65,200],"detail,":[36],"regard":[38],"as":[42,135,231],"character-based":[44,59],"tagging":[45,60],"problem.":[46],"We":[47,126],"show":[48,127,170],"that":[49,103,128,171,235],"there":[50,129],"has":[51],"been":[52],"potent":[54],"trend":[55],"of":[56,118,161,193,253],"using":[57],"approach":[61],"field.":[64],"particular,":[66],"learning":[67,174,194,210],"segmented":[69,156],"corpus":[70],"with":[71,176],"or":[72],"without":[73],"additional":[74,203],"linguistic":[75,159,204],"resources":[76,205],"treated":[78],"unified":[81],"way":[82],"which":[84,149],"only":[86,124,195],"difference":[87,134],"depends":[88],"how":[90],"feature":[92,105,121,180],"template":[93,106,122],"set":[94,110,164,185],"selected.":[96,140],"It":[97],"differs":[98],"existing":[100,237],"work":[101],"both":[104],"selection":[107,111],"and":[108,182,216],"tag":[109,137,163],"are":[112,139,206],"considered":[113],"our":[115,244],"approach,":[116,243],"instead":[117],"previous":[120],"focus":[123],"technique.":[125],"significant":[132],"performance":[133,189,248],"different":[136],"sets":[138],"This":[141],"especially":[143],"applied":[144],"to":[145,226],"six-tag":[147,184],"set,":[148],"good":[151],"enough":[152],"for":[153],"most":[154],"current":[155],"corpora.":[157],"The":[158],"meaning":[160],"also":[166,224],"discussed.":[167],"Our":[168],"results":[169],"simple":[173],"system":[175,245],"six":[177],"n":[178],"-gram":[179],"templates":[181],"can":[186],"obtain":[187],"competitive":[188],"cases":[192,201],"training":[198],"corpus.":[199],"when":[202],"available,":[207],"ensemble":[209],"technique,":[211],"assistant":[212],"segmenter,":[213],"proposed":[215,242],"its":[217],"effectiveness":[218],"verified.":[220],"Assistant":[221],"segmenter":[222],"proven":[225],"be":[227],"effective":[229],"method":[230],"standard":[233],"adaptation":[234],"outperforms":[236],"ones.":[238],"provides":[246],"state-of-the-art":[247],"all":[250],"12":[251],"corpora":[252],"three":[254],"international":[255],"bakeoffs.":[259]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":8},{"year":2016,"cited_by_count":9},{"year":2015,"cited_by_count":7},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":10},{"year":2012,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
