{"id":"https://openalex.org/W4367055929","doi":"https://doi.org/10.1145/3594634","title":"Multilingual BERT-based Word Alignment By Incorporating Common Chinese Characters","display_name":"Multilingual BERT-based Word Alignment By Incorporating Common Chinese Characters","publication_year":2023,"publication_date":"2023-04-26","ids":{"openalex":"https://openalex.org/W4367055929","doi":"https://doi.org/10.1145/3594634"},"language":"en","primary_location":{"id":"doi:10.1145/3594634","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3594634","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102015595","display_name":"Zezhong Li","orcid":"https://orcid.org/0000-0003-2257-178X"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zezhong Li","raw_affiliation_strings":["Hefei University of Technology, China"],"affiliations":[{"raw_affiliation_string":"Hefei University of Technology, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088062069","display_name":"Xiao Sun","orcid":"https://orcid.org/0000-0001-9750-7032"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao Sun","raw_affiliation_strings":["Hefei University of Technology, China"],"affiliations":[{"raw_affiliation_string":"Hefei University of Technology, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071943346","display_name":"Fuji Ren","orcid":"https://orcid.org/0000-0003-4860-9184"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fuji Ren","raw_affiliation_strings":["University of Electronic Science and Technology of China, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102933960","display_name":"Jianjun Ma","orcid":"https://orcid.org/0000-0003-1620-8490"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianjun Ma","raw_affiliation_strings":["Dalian University of Technology, China"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046871248","display_name":"Degen Huang","orcid":"https://orcid.org/0000-0002-8860-7805"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Degen Huang","raw_affiliation_strings":["Dalian University of Technology, China"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055169822","display_name":"Piao Shi","orcid":"https://orcid.org/0000-0002-0783-5487"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Piao Shi","raw_affiliation_strings":["Hefei University of Technology, China"],"affiliations":[{"raw_affiliation_string":"Hefei University of Technology, China","institution_ids":["https://openalex.org/I16365422"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102015595"],"corresponding_institution_ids":["https://openalex.org/I16365422"],"apc_list":null,"apc_paid":null,"fwci":0.5219,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.70073918,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"22","issue":"6","first_page":"1","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8142322897911072},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.666925311088562},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6352171301841736},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6259437799453735},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.613271176815033},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5974482297897339},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.49831128120422363},{"id":"https://openalex.org/keywords/ibm","display_name":"IBM","score":0.4963865876197815},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.4307243824005127},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.421242356300354},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.4177246689796448},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36550426483154297},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1952897012233734}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8142322897911072},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.666925311088562},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6352171301841736},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6259437799453735},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.613271176815033},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5974482297897339},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.49831128120422363},{"id":"https://openalex.org/C70388272","wikidata":"https://www.wikidata.org/wiki/Q5968558","display_name":"IBM","level":2,"score":0.4963865876197815},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.4307243824005127},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.421242356300354},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.4177246689796448},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36550426483154297},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1952897012233734},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C171250308","wikidata":"https://www.wikidata.org/wiki/Q11468","display_name":"Nanotechnology","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3594634","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3594634","pdf_url":null,"source":{"id":"https://openalex.org/S4306421405","display_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","issn_l":"2375-4699","issn":["2375-4699","2375-4702"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian and Low-Resource Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7400000095367432}],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1614471940","display_name":null,"funder_award_id":"2020AAA0","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5630136037","display_name":null,"funder_award_id":"2020AAA0108004","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8571511556","display_name":null,"funder_award_id":"61976078","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1966253631","https://openalex.org/W2121879602","https://openalex.org/W2156985047","https://openalex.org/W2408090783","https://openalex.org/W2740510699","https://openalex.org/W2893015365","https://openalex.org/W2907252220","https://openalex.org/W2912070261","https://openalex.org/W2964015966","https://openalex.org/W2966610483","https://openalex.org/W2970045405","https://openalex.org/W2998215494","https://openalex.org/W3016973796","https://openalex.org/W3102425047","https://openalex.org/W3103942011","https://openalex.org/W3104881680","https://openalex.org/W3105813095","https://openalex.org/W4229056828"],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W2953234277","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W4382618745","https://openalex.org/W1973775000","https://openalex.org/W2748922771","https://openalex.org/W1987128138"],"abstract_inverted_index":{"Word":[0],"alignment":[1,16,97],"is":[2,17,113],"an":[3,124],"important":[4],"task":[5],"of":[6,32,55,59,76,95,107,197,204],"detecting":[7],"translation":[8],"equivalents":[9],"between":[10,98],"a":[11,30,74,104,176],"sentence":[12],"pair.":[13],"Although":[14],"word":[15,47,96,161,181],"no":[18],"longer":[19],"necessarily":[20],"needed":[21],"for":[22,126,179],"neural":[23,84],"machine":[24],"translation,":[25],"it\u2019s":[26],"still":[27,50],"useful":[28],"in":[29,69,78,120,195],"wealth":[31],"applications,":[33],"e.g.,":[34],"bilingual":[35],"lexicon":[36],"induction,":[37],"constraint":[38],"decoding,":[39],"and":[40,52,100,132,168,174,200],"so":[41],"on.":[42],"However,":[43],"the":[44,67,80,130,136,159,185,202],"most":[45,142],"well-known":[46],"aligners":[48],"are":[49,57],"Giza++":[51],"fastAlign,":[53],"both":[54,121],"which":[56,102],"implementations":[58],"traditional":[60],"IBM":[61,81],"models.":[62,85],"To":[63],"keep":[64],"pace":[65],"with":[66,83,135],"advance":[68],"NMT,":[70],"there":[71],"has":[72],"been":[73],"surge":[75],"interest":[77],"replacing":[79],"models":[82],"We":[86],"follow":[87],"this":[88,146],"trend":[89],"but":[90],"aim":[91],"to":[92,114,157],"boost":[93,158],"performance":[94],"Japanese":[99],"Chinese,":[101],"share":[103],"large":[105],"portion":[106],"Chinese":[108,118,138,155,207],"characters.":[109,208],"Our":[110],"key":[111],"idea":[112],"leverage":[115,153],"these":[116],"common":[117,137,154,206],"characters":[119,139,156],"languages":[122],"as":[123],"indicator":[125],"inferring":[127],"alignment;":[128],"i.e.,":[129],"source":[131],"target":[133],"words":[134],"should":[140],"be":[141],"likely":[143],"aligned.":[144],"Following":[145],"idea,":[147],"we":[148,172],"propose":[149],"three":[150],"methods":[151,190],"that":[152,188],"mBERT-based":[160],"alignment,":[162,167],"including":[163],"reward":[164],"factor,":[165],"representation":[166],"contrastive":[169],"training.":[170],"Furthermore,":[171],"annotate":[173],"release":[175],"golden":[177],"dataset":[178,186],"Japanese-Chinese":[180],"alignment.":[182],"Experiments":[183],"on":[184],"show":[187],"our":[189],"outperform":[191],"several":[192],"strong":[193],"baselines":[194],"terms":[196],"AER":[198],"score":[199],"verify":[201],"effectiveness":[203],"exploiting":[205]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
