{"id":"https://openalex.org/W2251066033","doi":"https://doi.org/10.3115/v1/p14-1128","title":"Toward Better Chinese Word Segmentation for SMT via Bilingual Constraints","display_name":"Toward Better Chinese Word Segmentation for SMT via Bilingual Constraints","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2251066033","doi":"https://doi.org/10.3115/v1/p14-1128","mag":"2251066033"},"language":"en","primary_location":{"id":"doi:10.3115/v1/p14-1128","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/p14-1128","pdf_url":"https://aclanthology.org/P14-1128.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/P14-1128.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059787539","display_name":"Xiaodong Zeng","orcid":"https://orcid.org/0000-0001-7638-5443"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":true,"raw_author_name":"Xiaodong Zeng","raw_affiliation_strings":["CT Lab / Department of Computer and Information Science, University of Macau"],"affiliations":[{"raw_affiliation_string":"CT Lab / Department of Computer and Information Science, University of Macau","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025832925","display_name":"Lidia S. Chao","orcid":"https://orcid.org/0000-0001-6629-170X"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Lidia S. Chao","raw_affiliation_strings":["CT Lab / Department of Computer and Information Science, University of Macau"],"affiliations":[{"raw_affiliation_string":"CT Lab / Department of Computer and Information Science, University of Macau","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101468579","display_name":"Derek F. Wong","orcid":"https://orcid.org/0000-0002-5307-7322"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Derek F. Wong","raw_affiliation_strings":["CT Lab / Department of Computer and Information Science, University of Macau"],"affiliations":[{"raw_affiliation_string":"CT Lab / Department of Computer and Information Science, University of Macau","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054590402","display_name":"Isabel Trancoso","orcid":"https://orcid.org/0000-0001-5874-6313"},"institutions":[{"id":"https://openalex.org/I4387152517","display_name":"Instituto Superior T\u00e9cnico","ror":"https://ror.org/03db2by73","country_code":null,"type":"education","lineage":["https://openalex.org/I141596103","https://openalex.org/I4387152517"]},{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Isabel Trancoso","raw_affiliation_strings":["INESC-ID / Instituto Superior Tnico, Lisboa, Portugal","INESC-ID / Instituto Superior T\u00e9nico, Lisboa, Portugal"],"affiliations":[{"raw_affiliation_string":"INESC-ID / Instituto Superior Tnico, Lisboa, Portugal","institution_ids":["https://openalex.org/I121345201"]},{"raw_affiliation_string":"INESC-ID / Instituto Superior T\u00e9nico, Lisboa, Portugal","institution_ids":["https://openalex.org/I121345201","https://openalex.org/I4387152517"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101634356","display_name":"Tian Liang","orcid":"https://orcid.org/0000-0002-6744-8294"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Liang Tian","raw_affiliation_strings":["CT Lab / Department of Computer and Information Science, University of Macau"],"affiliations":[{"raw_affiliation_string":"CT Lab / Department of Computer and Information Science, University of Macau","institution_ids":["https://openalex.org/I204512498"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5059787539"],"corresponding_institution_ids":["https://openalex.org/I204512498"],"apc_list":null,"apc_paid":null,"fwci":2.9687,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.92589506,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1360","last_page":"1369"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9815999865531921,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8253066539764404},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7241408228874207},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6526769399642944},{"id":"https://openalex.org/keywords/treebank","display_name":"Treebank","score":0.6439944505691528},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.608080267906189},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.60725337266922},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.584632933139801},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5743938088417053},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5207064747810364},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.47133469581604004},{"id":"https://openalex.org/keywords/crfs","display_name":"CRFS","score":0.45713719725608826},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.41408833861351013},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.389769047498703},{"id":"https://openalex.org/keywords/conditional-random-field","display_name":"Conditional random field","score":0.3411768078804016},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.27165061235427856},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08619490265846252}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8253066539764404},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7241408228874207},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6526769399642944},{"id":"https://openalex.org/C206134035","wikidata":"https://www.wikidata.org/wiki/Q811525","display_name":"Treebank","level":3,"score":0.6439944505691528},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.608080267906189},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.60725337266922},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.584632933139801},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5743938088417053},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5207064747810364},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.47133469581604004},{"id":"https://openalex.org/C2775953691","wikidata":"https://www.wikidata.org/wiki/Q5013874","display_name":"CRFS","level":3,"score":0.45713719725608826},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.41408833861351013},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.389769047498703},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.3411768078804016},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.27165061235427856},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08619490265846252},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3115/v1/p14-1128","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/p14-1128","pdf_url":"https://aclanthology.org/P14-1128.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.3115/v1/p14-1128","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/p14-1128","pdf_url":"https://aclanthology.org/P14-1128.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6700000166893005}],"awards":[{"id":"https://openalex.org/G2292569425","display_name":null,"funder_award_id":"MYRG076 (Y1-L2)-FST13-WF","funder_id":"https://openalex.org/F4320322841","funder_display_name":"Universidade de Macau"},{"id":"https://openalex.org/G3249123417","display_name":null,"funder_award_id":"Y1-L2","funder_id":"https://openalex.org/F4320322841","funder_display_name":"Universidade de Macau"},{"id":"https://openalex.org/G8461126762","display_name":null,"funder_award_id":"MYRG070 (Y1-L2)-FST12-CS","funder_id":"https://openalex.org/F4320322841","funder_display_name":"Universidade de Macau"},{"id":"https://openalex.org/G943999617","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"}],"funders":[{"id":"https://openalex.org/F4320322841","display_name":"Universidade de Macau","ror":"https://ror.org/01r4q9n85"},{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2251066033.pdf","grobid_xml":"https://content.openalex.org/works/W2251066033.grobid-xml"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W61894391","https://openalex.org/W1412698887","https://openalex.org/W1631260214","https://openalex.org/W1709989312","https://openalex.org/W1902922832","https://openalex.org/W1982498087","https://openalex.org/W2075508846","https://openalex.org/W2088073093","https://openalex.org/W2095758845","https://openalex.org/W2096204319","https://openalex.org/W2099048402","https://openalex.org/W2101105183","https://openalex.org/W2115498396","https://openalex.org/W2121524931","https://openalex.org/W2123301721","https://openalex.org/W2124807415","https://openalex.org/W2127626780","https://openalex.org/W2131988669","https://openalex.org/W2134134392","https://openalex.org/W2139823104","https://openalex.org/W2142523187","https://openalex.org/W2144783305","https://openalex.org/W2146574666","https://openalex.org/W2149230967","https://openalex.org/W2151197196","https://openalex.org/W2154368244","https://openalex.org/W2156985047","https://openalex.org/W2157875692","https://openalex.org/W2167768673","https://openalex.org/W2186188534","https://openalex.org/W2250876691","https://openalex.org/W2252264945","https://openalex.org/W2595715041","https://openalex.org/W2787109023","https://openalex.org/W3010865323","https://openalex.org/W3151142710","https://openalex.org/W4250803253"],"related_works":["https://openalex.org/W2055466819","https://openalex.org/W50079190","https://openalex.org/W2356597680","https://openalex.org/W182104056","https://openalex.org/W2111726165","https://openalex.org/W2011251309","https://openalex.org/W3108423214","https://openalex.org/W2796133761","https://openalex.org/W3088215229","https://openalex.org/W2511246383"],"abstract_inverted_index":{"This":[0],"study":[1],"investigates":[2],"on":[3,61,91],"building":[4],"a":[5,30,51,74,92],"better":[6],"Chinese":[7],"word":[8,19,40,68],"segmentation":[9,32,105],"model":[10,80,101],"for":[11],"statistical":[12],"machine":[13,94],"translation.":[14],"It":[15],"aims":[16],"at":[17],"leveraging":[18],"boundary":[20,69],"information,":[21],"automatically":[22],"learned":[23],"by":[24,56,84],"bilingual":[25,63],"character-based":[26],"alignments,":[27],"to":[28,45,107],"induce":[29],"preferable":[31],"model.":[33],"We":[34],"propose":[35],"dealing":[36],"with":[37],"the":[38,47,57,62,99],"induced":[39,67],"boundaries":[41],"as":[42,73],"soft":[43],"constraints":[44],"bias":[46],"continuous":[48],"learning":[49],"of":[50],"supervised":[52],"CRFs":[53],"model,":[54],"trained":[55],"treebank":[58],"data":[59,64],"(labeled),":[60],"(unlabeled).":[65],"The":[66,78,89],"information":[70],"is":[71,82],"encoded":[72],"graph":[75],"propagation":[76],"constraint.":[77],"constrained":[79],"induction":[81],"accomplished":[83],"using":[85],"posterior":[86],"regularization":[87],"algorithm.":[88],"experiments":[90],"Chinese-to-English":[93],"translation":[95,108],"task":[96],"reveal":[97],"that":[98],"proposed":[100],"can":[102],"bring":[103],"positive":[104],"effects":[106],"quality.":[109]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":2}],"updated_date":"2026-03-15T09:29:46.208133","created_date":"2025-10-10T00:00:00"}
