{"id":"https://openalex.org/W4310608684","doi":"https://doi.org/10.1109/ialp57159.2022.9961276","title":"Ensemble Machine Translation to Filter Low Quality Corpus","display_name":"Ensemble Machine Translation to Filter Low Quality Corpus","publication_year":2022,"publication_date":"2022-10-27","ids":{"openalex":"https://openalex.org/W4310608684","doi":"https://doi.org/10.1109/ialp57159.2022.9961276"},"language":"en","primary_location":{"id":"doi:10.1109/ialp57159.2022.9961276","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ialp57159.2022.9961276","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Asian Language Processing (IALP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043098825","display_name":"Wuying Liu","orcid":"https://orcid.org/0009-0004-4641-8012"},"institutions":[{"id":"https://openalex.org/I186272606","display_name":"Guangdong University of Foreign Studies","ror":"https://ror.org/00fhc9y79","country_code":"CN","type":"education","lineage":["https://openalex.org/I186272606"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wuying Liu","raw_affiliation_strings":["Guangdong University of Foreign Studies,Laboratory of Language Engineering and Computing,Guangzhou,China,510420"],"affiliations":[{"raw_affiliation_string":"Guangdong University of Foreign Studies,Laboratory of Language Engineering and Computing,Guangzhou,China,510420","institution_ids":["https://openalex.org/I186272606"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100403242","display_name":"Lin Wang","orcid":"https://orcid.org/0000-0003-2374-0725"},"institutions":[{"id":"https://openalex.org/I11406153","display_name":"Shanghai International Studies University","ror":"https://ror.org/01bn89z48","country_code":"CN","type":"education","lineage":["https://openalex.org/I11406153"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Wang","raw_affiliation_strings":["Xianda College of Economics and Humanities, Shanghai International Studies University,Shanghai,China,200083"],"affiliations":[{"raw_affiliation_string":"Xianda College of Economics and Humanities, Shanghai International Studies University,Shanghai,China,200083","institution_ids":["https://openalex.org/I11406153"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5043098825"],"corresponding_institution_ids":["https://openalex.org/I186272606"],"apc_list":null,"apc_paid":null,"fwci":0.1326,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.54169666,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"30","issue":null,"first_page":"500","last_page":"504"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10759","display_name":"Translation Studies and Practices","score":0.9731000065803528,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8457393646240234},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7737877368927002},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6125754117965698},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5525698661804199},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5096004009246826},{"id":"https://openalex.org/keywords/language-translation","display_name":"Language translation","score":0.49989771842956543},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.48967722058296204},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.47603726387023926},{"id":"https://openalex.org/keywords/levenshtein-distance","display_name":"Levenshtein distance","score":0.4737834930419922},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4572747051715851},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44530221819877625},{"id":"https://openalex.org/keywords/transfer-based-machine-translation","display_name":"Transfer-based machine translation","score":0.4223543405532837},{"id":"https://openalex.org/keywords/example-based-machine-translation","display_name":"Example-based machine translation","score":0.3988632559776306},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3219967484474182}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8457393646240234},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7737877368927002},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6125754117965698},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5525698661804199},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5096004009246826},{"id":"https://openalex.org/C2986862884","wikidata":"https://www.wikidata.org/wiki/Q7553","display_name":"Language translation","level":3,"score":0.49989771842956543},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.48967722058296204},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47603726387023926},{"id":"https://openalex.org/C2777515626","wikidata":"https://www.wikidata.org/wiki/Q496939","display_name":"Levenshtein distance","level":2,"score":0.4737834930419922},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4572747051715851},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44530221819877625},{"id":"https://openalex.org/C130597682","wikidata":"https://www.wikidata.org/wiki/Q6961922","display_name":"Transfer-based machine translation","level":4,"score":0.4223543405532837},{"id":"https://openalex.org/C24687705","wikidata":"https://www.wikidata.org/wiki/Q3753284","display_name":"Example-based machine translation","level":3,"score":0.3988632559776306},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3219967484474182},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ialp57159.2022.9961276","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ialp57159.2022.9961276","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Asian Language Processing (IALP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5299999713897705}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1534477342","https://openalex.org/W1663984431","https://openalex.org/W1910131649","https://openalex.org/W2154124206","https://openalex.org/W2250606284","https://openalex.org/W2353026543","https://openalex.org/W2394767182","https://openalex.org/W3203151029","https://openalex.org/W4288284086","https://openalex.org/W4385245566","https://openalex.org/W6682759559","https://openalex.org/W6691438880","https://openalex.org/W6711922366","https://openalex.org/W6739901393","https://openalex.org/W6765386044","https://openalex.org/W6801720629","https://openalex.org/W7024479735"],"related_works":["https://openalex.org/W1559710535","https://openalex.org/W2027317339","https://openalex.org/W4312364893","https://openalex.org/W4283020986","https://openalex.org/W2997015083","https://openalex.org/W4318811770","https://openalex.org/W4362495644","https://openalex.org/W2345966427","https://openalex.org/W3214198970","https://openalex.org/W4306816465"],"abstract_inverted_index":{"The":[0,22],"popularization":[1],"of":[2,9,18,24,35,66,77,97,140,153,162],"ubiquitous":[3],"computing":[4],"equipment":[5],"and":[6,52,86,102,111,113,121,126,145,167],"the":[7,16,33,73,82,95,115,133,138,146,151,159],"development":[8],"network":[10],"communication":[11],"technology":[12],"have":[13],"greatly":[14],"improved":[15],"productivity":[17],"human":[19],"language":[20,26],"information.":[21],"quality":[23,43,59],"large-scale":[25,46],"information":[27],"is":[28,49],"a":[29,63,104,123],"key":[30],"factor":[31],"affecting":[32],"effectiveness":[34],"deep":[36],"learning":[37],"algorithms.":[38],"How":[39],"to":[40,56],"filter":[41],"low":[42],"corpus":[44,47,60,175],"from":[45,114],"that":[48,132],"unstructured,":[50],"non-standardized,":[51],"even":[53],"contains":[54],"fallacies,":[55],"obtain":[57],"high":[58],"has":[61],"become":[62],"research":[64],"issue":[65,76],"great":[67],"application":[68],"value.":[69],"We":[70],"focus":[71],"on":[72],"specific":[74],"filtering":[75,84,92,109],"bilingual":[78],"parallel":[79],"corpus,":[80],"re-examine":[81],"manual":[83],"process,":[85],"propose":[87],"an":[88],"ensemble":[89,106,170],"machine":[90,107,142,155],"translation":[91,98,108,116,143,156],"idea.":[93],"From":[94],"perspective":[96],"direction,":[99],"we":[100,119],"design":[101,120],"implement":[103,122,172],"single-engine-based":[105,134],"framework":[110,125],"algorithm,":[112],"system":[117],"perspective,":[118],"multi-engine-based":[124,147],"algorithm":[127,135],"respectively.":[128],"Experimental":[129],"results":[130],"show":[131],"can":[136,149,171],"integrate":[137,150],"advantages":[139,152],"different":[141,154],"directions,":[144],"one":[148],"systems.":[157],"Using":[158],"straightforward":[160],"methods":[161],"Levenshtein":[163],"string":[164],"morphological":[165],"similarity":[166],"linear":[168],"weighted":[169],"efficient":[173],"industrial-grade":[174],"filtering.":[176]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
