{"id":"https://openalex.org/W4220674272","doi":"https://doi.org/10.1162/coli_a_00435","title":"Challenges of Neural Machine Translation for Short Texts","display_name":"Challenges of Neural Machine Translation for Short Texts","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4220674272","doi":"https://doi.org/10.1162/coli_a_00435"},"language":"en","primary_location":{"id":"doi:10.1162/coli_a_00435","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00435","pdf_url":"https://direct.mit.edu/coli/article-pdf/48/2/321/2029089/coli_a_00435.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/coli/article-pdf/48/2/321/2029089/coli_a_00435.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100377278","display_name":"Yu Wan","orcid":"https://orcid.org/0000-0002-7310-7883"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":true,"raw_author_name":"Yu Wan","raw_affiliation_strings":["NLP2CT Lab, University of Macau. nlp2ct.ywan@gmail.com"],"affiliations":[{"raw_affiliation_string":"NLP2CT Lab, University of Macau. nlp2ct.ywan@gmail.com","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028040391","display_name":"Baosong Yang","orcid":"https://orcid.org/0000-0001-5002-2409"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Baosong Yang","raw_affiliation_strings":["Alibaba Group. yangbaosong.ybs@alibaba-inc.com"],"affiliations":[{"raw_affiliation_string":"Alibaba Group. yangbaosong.ybs@alibaba-inc.com","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101468579","display_name":"Derek F. Wong","orcid":"https://orcid.org/0000-0002-5307-7322"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":true,"raw_author_name":"Derek Fai Wong","raw_affiliation_strings":["NLP2CT Lab, University of Macau. derekfw@um.edu.mo"],"affiliations":[{"raw_affiliation_string":"NLP2CT Lab, University of Macau. derekfw@um.edu.mo","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025832925","display_name":"Lidia S. Chao","orcid":"https://orcid.org/0000-0001-6629-170X"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":true,"raw_author_name":"Lidia Sam Chao","raw_affiliation_strings":["NLP2CT Lab, University of Macau. lidiasc@um.edu.mo"],"affiliations":[{"raw_affiliation_string":"NLP2CT Lab, University of Macau. lidiasc@um.edu.mo","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101613727","display_name":"Liang Yao","orcid":"https://orcid.org/0000-0002-8637-0760"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Liang Yao","raw_affiliation_strings":["Alibaba Group. yaoliang.yl@alibaba-inc.com"],"affiliations":[{"raw_affiliation_string":"Alibaba Group. yaoliang.yl@alibaba-inc.com","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100415757","display_name":"Haibo Zhang","orcid":"https://orcid.org/0000-0002-3752-0806"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Haibo Zhang","raw_affiliation_strings":["Alibaba Group. zhanhui.zhb@alibaba-inc.com"],"affiliations":[{"raw_affiliation_string":"Alibaba Group. zhanhui.zhb@alibaba-inc.com","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019715118","display_name":"Boxing Chen","orcid":"https://orcid.org/0000-0002-3170-4858"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Boxing Chen","raw_affiliation_strings":["Alibaba Group. boxing.cbx@alibaba-inc.com"],"affiliations":[{"raw_affiliation_string":"Alibaba Group. boxing.cbx@alibaba-inc.com","institution_ids":["https://openalex.org/I4210095624"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5019715118","https://openalex.org/A5025832925","https://openalex.org/A5028040391","https://openalex.org/A5100377278","https://openalex.org/A5100415757","https://openalex.org/A5101468579","https://openalex.org/A5101613727"],"corresponding_institution_ids":["https://openalex.org/I204512498","https://openalex.org/I4210095624"],"apc_list":null,"apc_paid":null,"fwci":4.8587,"has_fulltext":false,"cited_by_count":38,"citation_normalized_percentile":{"value":0.95567861,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"48","issue":"2","first_page":"321","last_page":"342"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9729999899864197,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8249987363815308},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7992228269577026},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6008312106132507},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5977151989936829},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5133013129234314},{"id":"https://openalex.org/keywords/intuition","display_name":"Intuition","score":0.4824850857257843},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.45660024881362915},{"id":"https://openalex.org/keywords/rule-based-machine-translation","display_name":"Rule-based machine translation","score":0.44965481758117676},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.41762447357177734},{"id":"https://openalex.org/keywords/dynamic-and-formal-equivalence","display_name":"Dynamic and formal equivalence","score":0.4142954349517822},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3550637364387512},{"id":"https://openalex.org/keywords/cognitive-science","display_name":"Cognitive science","score":0.15636688470840454}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8249987363815308},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7992228269577026},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6008312106132507},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5977151989936829},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5133013129234314},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.4824850857257843},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.45660024881362915},{"id":"https://openalex.org/C53893814","wikidata":"https://www.wikidata.org/wiki/Q7378909","display_name":"Rule-based machine translation","level":2,"score":0.44965481758117676},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.41762447357177734},{"id":"https://openalex.org/C98199350","wikidata":"https://www.wikidata.org/wiki/Q978442","display_name":"Dynamic and formal equivalence","level":3,"score":0.4142954349517822},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3550637364387512},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.15636688470840454},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/coli_a_00435","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00435","pdf_url":"https://direct.mit.edu/coli/article-pdf/48/2/321/2029089/coli_a_00435.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:6fd71588b8d94763bbb4063f38ab89a4","is_oa":true,"landing_page_url":"https://doaj.org/article/6fd71588b8d94763bbb4063f38ab89a4","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computational Linguistics, Vol 48, Iss 2 (2022)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/coli_a_00435","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00435","pdf_url":"https://direct.mit.edu/coli/article-pdf/48/2/321/2029089/coli_a_00435.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7300000190734863,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4220674272.pdf","grobid_xml":"https://content.openalex.org/works/W4220674272.grobid-xml"},"referenced_works_count":89,"referenced_works":["https://openalex.org/W1902237438","https://openalex.org/W2064675550","https://openalex.org/W2095705004","https://openalex.org/W2101105183","https://openalex.org/W2110485445","https://openalex.org/W2146574666","https://openalex.org/W2296073425","https://openalex.org/W2399715921","https://openalex.org/W2525778437","https://openalex.org/W2564861257","https://openalex.org/W2573119710","https://openalex.org/W2741585609","https://openalex.org/W2741838462","https://openalex.org/W2767982226","https://openalex.org/W2888312537","https://openalex.org/W2888519496","https://openalex.org/W2891713103","https://openalex.org/W2898846200","https://openalex.org/W2898856000","https://openalex.org/W2923622379","https://openalex.org/W2951563833","https://openalex.org/W2952446148","https://openalex.org/W2962784628","https://openalex.org/W2962943802","https://openalex.org/W2963218093","https://openalex.org/W2963260202","https://openalex.org/W2963382396","https://openalex.org/W2963463964","https://openalex.org/W2963506925","https://openalex.org/W2963652649","https://openalex.org/W2963665552","https://openalex.org/W2963675284","https://openalex.org/W2963713328","https://openalex.org/W2963807318","https://openalex.org/W2963925437","https://openalex.org/W2964030506","https://openalex.org/W2964199361","https://openalex.org/W2964302946","https://openalex.org/W2965373594","https://openalex.org/W2971302374","https://openalex.org/W2972355302","https://openalex.org/W2983108239","https://openalex.org/W2984500026","https://openalex.org/W2986712369","https://openalex.org/W2988975212","https://openalex.org/W2989156240","https://openalex.org/W2989202544","https://openalex.org/W2997287044","https://openalex.org/W3034201598","https://openalex.org/W3035072529","https://openalex.org/W3035169087","https://openalex.org/W3035473397","https://openalex.org/W3035490055","https://openalex.org/W3099907503","https://openalex.org/W3101155369","https://openalex.org/W3101990291","https://openalex.org/W3115909586","https://openalex.org/W3166829167","https://openalex.org/W3208526031","https://openalex.org/W4205902821","https://openalex.org/W4241645538","https://openalex.org/W6608792757","https://openalex.org/W6617145748","https://openalex.org/W6638827026","https://openalex.org/W6674330103","https://openalex.org/W6679434410","https://openalex.org/W6679436768","https://openalex.org/W6681703963","https://openalex.org/W6685322675","https://openalex.org/W6691654534","https://openalex.org/W6691895530","https://openalex.org/W6727690538","https://openalex.org/W6730949058","https://openalex.org/W6739651123","https://openalex.org/W6739901393","https://openalex.org/W6745859089","https://openalex.org/W6746561596","https://openalex.org/W6747898760","https://openalex.org/W6755845484","https://openalex.org/W6757585730","https://openalex.org/W6766673545","https://openalex.org/W6768061436","https://openalex.org/W6770611941","https://openalex.org/W6778507116","https://openalex.org/W6784550270","https://openalex.org/W6784766206","https://openalex.org/W6788634442","https://openalex.org/W6791280637","https://openalex.org/W6802409534"],"related_works":["https://openalex.org/W1512718085","https://openalex.org/W3163399208","https://openalex.org/W2167662847","https://openalex.org/W1569841287","https://openalex.org/W1978161643","https://openalex.org/W2915828828","https://openalex.org/W2360212709","https://openalex.org/W193726211","https://openalex.org/W3194357474","https://openalex.org/W2942770826"],"abstract_inverted_index":{"Abstract":[0],"Short":[1],"texts":[2],"(STs)":[3],"present":[4],"in":[5,21,104,120,188,199],"a":[6],"variety":[7],"of":[8,17,140],"scenarios,":[9],"including":[10],"query,":[11],"dialog,":[12],"and":[13,49,88,136,153,175,208],"entity":[14],"names.":[15],"Most":[16],"the":[18,73,96,116,138,185],"exciting":[19],"studies":[20],"neural":[22],"machine":[23],"translation":[24,110,181,186,206,210],"(NMT)":[25],"are":[26,53],"focused":[27],"on":[28,134,150],"tackling":[29],"open":[30],"problems":[31],"concerning":[32],"long":[33],"sentences":[34],"rather":[35],"than":[36],"short":[37,51,151],"ones.":[38],"The":[39],"intuition":[40],"behind":[41,98],"is":[42,157],"that,":[43],"with":[44,109],"respect":[45],"to":[46,130,145,195],"human":[47],"learning":[48],"processing,":[50],"sequences":[52],"generally":[54],"regarded":[55],"as":[56],"easy":[57],"examples.":[58],"In":[59],"this":[60,65],"article,":[61],"we":[62,100],"first":[63],"dispel":[64],"speculation":[66],"via":[67],"conducting":[68],"preliminary":[69],"experiments,":[70],"showing":[71],"that":[72],"conventional":[74],"state-of-the-art":[75],"NMT":[76,105,144,155,189,200],"approach,":[77],"namely,":[78],"Transformer":[79],"(Vaswani":[80],"et":[81],"al.":[82],"2017),":[83],"still":[84],"suffers":[85],"from":[86],"over-translation":[87,132],"mistranslation":[89,161],"errors":[90,207],"over":[91,127],"STs.":[92,191],"After":[93],"empirically":[94],"investigating":[95],"rationale":[97],"this,":[99],"summarize":[101],"two":[102],"challenges":[103,198],"for":[106,170,190,201],"STs":[107],"associated":[108],"error":[111],"types":[112],"above,":[113],"respectively:":[114],"(1)":[115],"imbalanced":[117],"length":[118],"distribution":[119,169],"training":[121,171],"set":[122],"intensifies":[123],"model":[124,156],"inference":[125],"calibration":[126],"STs,":[128,202],"leading":[129],"more":[131],"cases":[133],"STs;":[135],"(2)":[137],"lack":[139],"contextual":[141,177],"information":[142,178],"forces":[143],"have":[146],"higher":[147],"data":[148,168,173],"uncertainty":[149],"sentences,":[152],"thus":[154,203],"troubled":[158],"by":[159],"considerable":[160],"errors.":[162],"Some":[163],"existing":[164],"approaches,":[165],"like":[166],"balancing":[167],"(e.g.,":[172,179],"upsampling)":[174],"complementing":[176],"introducing":[180],"memory)":[182],"can":[183],"alleviate":[184],"issues":[187],"We":[192],"encourage":[193],"researchers":[194],"investigate":[196],"other":[197],"reducing":[204],"ST":[205],"enhancing":[209],"quality.":[211]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":3}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
