{"id":"https://openalex.org/W2069634628","doi":"https://doi.org/10.1145/1363108.1363109","title":"Chinese word segmentation and statistical machine translation","display_name":"Chinese word segmentation and statistical machine translation","publication_year":2008,"publication_date":"2008-05-01","ids":{"openalex":"https://openalex.org/W2069634628","doi":"https://doi.org/10.1145/1363108.1363109","mag":"2069634628"},"language":"en","primary_location":{"id":"doi:10.1145/1363108.1363109","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1363108.1363109","pdf_url":null,"source":{"id":"https://openalex.org/S200945739","display_name":"ACM Transactions on Speech and Language Processing","issn_l":"1550-4875","issn":["1550-4875","1550-4883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Speech and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042810113","display_name":"Ruiqiang Zhang","orcid":"https://orcid.org/0000-0002-9781-7142"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Ruiqiang Zhang","raw_affiliation_strings":["National Institute of Information and Communications Technology, Kyoto, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Kyoto, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113449747","display_name":"Keiji Yasuda","orcid":null},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Keiji Yasuda","raw_affiliation_strings":["National Institute of Information and Communications Technology, Kyoto, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Kyoto, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033318800","display_name":"Eiichiro Sumita","orcid":"https://orcid.org/0000-0002-1028-4399"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Eiichiro Sumita","raw_affiliation_strings":["National Institute of Information and Communications Technology, Kyoto, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Kyoto, Japan","institution_ids":["https://openalex.org/I90023481"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5042810113"],"corresponding_institution_ids":["https://openalex.org/I90023481"],"apc_list":null,"apc_paid":null,"fwci":5.8742,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.95758397,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"5","issue":"2","first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.813483476638794},{"id":"https://openalex.org/keywords/concatenation","display_name":"Concatenation (mathematics)","score":0.685291051864624},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.6843594312667847},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5731084942817688},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5623276233673096},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5197924375534058},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5155760645866394},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.47360754013061523},{"id":"https://openalex.org/keywords/bleu","display_name":"BLEU","score":0.4644826352596283},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.44008901715278625},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4373491704463959},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.43542343378067017},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.12887278199195862},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0893283486366272}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.813483476638794},{"id":"https://openalex.org/C87619178","wikidata":"https://www.wikidata.org/wiki/Q126002","display_name":"Concatenation (mathematics)","level":2,"score":0.685291051864624},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.6843594312667847},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5731084942817688},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5623276233673096},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5197924375534058},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5155760645866394},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.47360754013061523},{"id":"https://openalex.org/C622187","wikidata":"https://www.wikidata.org/wiki/Q3500773","display_name":"BLEU","level":3,"score":0.4644826352596283},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.44008901715278625},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4373491704463959},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.43542343378067017},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.12887278199195862},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0893283486366272},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1363108.1363109","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1363108.1363109","pdf_url":null,"source":{"id":"https://openalex.org/S200945739","display_name":"ACM Transactions on Speech and Language Processing","issn_l":"1550-4875","issn":["1550-4875","1550-4883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Speech and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6700000166893005,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W839768","https://openalex.org/W25062297","https://openalex.org/W1979145089","https://openalex.org/W1982498087","https://openalex.org/W1988995507","https://openalex.org/W2017802499","https://openalex.org/W2035304092","https://openalex.org/W2036516910","https://openalex.org/W2084050849","https://openalex.org/W2086202918","https://openalex.org/W2101105183","https://openalex.org/W2118563017","https://openalex.org/W2132001515","https://openalex.org/W2146574666","https://openalex.org/W2147880316","https://openalex.org/W2153653739","https://openalex.org/W2156985047","https://openalex.org/W2160538511","https://openalex.org/W2170464899","https://openalex.org/W2291598608","https://openalex.org/W2395243984","https://openalex.org/W2405762604","https://openalex.org/W2467575451"],"related_works":["https://openalex.org/W1925994383","https://openalex.org/W2099607809","https://openalex.org/W2395641992","https://openalex.org/W2807475932","https://openalex.org/W3021126373","https://openalex.org/W4280571180","https://openalex.org/W2903057408","https://openalex.org/W92588874","https://openalex.org/W2963991316","https://openalex.org/W2883671469"],"abstract_inverted_index":{"Chinese":[0],"word":[1],"segmentation":[2],"(CWS)":[3],"is":[4],"a":[5,33,48,57,216,223],"necessary":[6],"step":[7],"in":[8,31,163,227,251],"Chinese-English":[9],"statistical":[10],"machine":[11],"translation":[12,61,235],"(SMT)":[13],"and":[14,40,74,87,107,125,150,153,179,190,200,222],"its":[15],"performance":[16],"has":[17,62],"an":[18],"impact":[19],"on":[20,80,193],"the":[21,70,121,132,136,142,161,164,167,173,176,229,253],"results":[22,129],"of":[23,78,144,166,213,219,232,234,255],"SMT.":[24,75,126],"However,":[25],"there":[26,155],"are":[27,241],"many":[28],"choices":[29,44],"involved":[30],"creating":[32],"CWS":[34,41,50,73,79,88,114,124,145,177,188,239],"system":[35],"such":[36],"as":[37],"various":[38,238],"specifications":[39,86,91,134],"methods.":[42,89],"The":[43,76],"made":[45],"will":[46,55],"create":[47],"new":[49],"scheme,":[51],"but":[52],"whether":[53],"it":[54],"produce":[56],"superior":[58],"or":[59],"inferior":[60],"remained":[63],"unknown":[64],"to":[65,119],"date.":[66],"This":[67,204],"article":[68,205],"examines":[69],"relationship":[71,122],"between":[72,123,160,175],"effects":[77,143],"SMT":[81,180,194],"were":[82,92,248],"investigated":[83],"using":[84],"different":[85,117,214],"Four":[90],"selected":[93],"for":[94,210],"investigation:":[95],"Beijing":[96],"University":[97,102],"(PKU),":[98],"Hong":[99],"Kong":[100],"City":[101],"(CITYU),":[103],"Microsoft":[104],"Research":[105],"(MSR),":[106],"Academia":[108],"SINICA":[109],"(AS).":[110],"We":[111,170,186,244],"created":[112],"16":[113],"schemes":[115,240],"under":[116],"settings":[118],"examine":[120],"Our":[127],"experimental":[128],"showed":[130],"that":[131],"MSR's":[133],"produced":[135],"lowest":[137],"quality":[138,165,254],"translations.":[139,169,256],"In":[140],"examining":[141],"methods,":[146],"we":[147],"tested":[148],"dictionary-based":[149],"CRF-based":[151],"approaches":[152,247],"found":[154,172,245],"was":[156,183],"no":[157],"significant":[158],"difference":[159],"two":[162,208],"resulting":[168],"also":[171,206],"correlation":[174],"F-score":[178],"BLEU":[181],"score":[182],"very":[184,249],"weak.":[185],"analyzed":[187],"errors":[189],"their":[191],"effect":[192],"by":[195],"evaluating":[196],"systems":[197],"trained":[198],"with":[199],"without":[201],"these":[202,246],"errors.":[203],"proposes":[207],"methods":[209],"combining":[211],"advantages":[212],"specifications:":[215],"simple":[217],"concatenation":[218],"training":[220],"data":[221],"feature":[224],"interpolation":[225],"approach":[226],"which":[228],"same":[230],"types":[231],"features":[233],"models":[236],"from":[237],"linearly":[242],"interpolated.":[243],"effective":[250],"improving":[252]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2012,"cited_by_count":3}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
