{"id":"https://openalex.org/W2107695330","doi":"https://doi.org/10.1162/089120105775299168","title":"Improving Machine Translation Performance by Exploiting Non-Parallel Corpora","display_name":"Improving Machine Translation Performance by Exploiting Non-Parallel Corpora","publication_year":2005,"publication_date":"2005-12-01","ids":{"openalex":"https://openalex.org/W2107695330","doi":"https://doi.org/10.1162/089120105775299168","mag":"2107695330"},"language":"en","primary_location":{"id":"doi:10.1162/089120105775299168","is_oa":true,"landing_page_url":"https://doi.org/10.1162/089120105775299168","pdf_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/089120105775299168","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/089120105775299168","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072829860","display_name":"Dragos Stefan Munteanu","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dragos Stefan Munteanu","raw_affiliation_strings":["Information Sciences Institute, University of Southern California, 4676 Admiralty Way, Suite 1001, Marina del Rey, CA 90292","Information Sciences Institute, University of Southern California, 4676 Admiralty Way, Suite 1001, Marina del Rey, CA 90292#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Sciences Institute, University of Southern California, 4676 Admiralty Way, Suite 1001, Marina del Rey, CA 90292","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Information Sciences Institute, University of Southern California, 4676 Admiralty Way, Suite 1001, Marina del Rey, CA 90292#TAB#","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005020376","display_name":"Daniel Marcu","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Marcu","raw_affiliation_strings":["Information Sciences Institute, University of Southern California, 4676 Admiralty Way, Suite 1001, Marina del Rey, CA 90292","Information Sciences Institute, University of Southern California, 4676 Admiralty Way, Suite 1001, Marina del Rey, CA 90292#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Sciences Institute, University of Southern California, 4676 Admiralty Way, Suite 1001, Marina del Rey, CA 90292","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Information Sciences Institute, University of Southern California, 4676 Admiralty Way, Suite 1001, Marina del Rey, CA 90292#TAB#","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5072829860"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":10.5669,"has_fulltext":true,"cited_by_count":392,"citation_normalized_percentile":{"value":0.982275,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":100},"biblio":{"volume":"31","issue":"4","first_page":"477","last_page":"504"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9868999719619751,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9066667556762695},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.836001992225647},{"id":"https://openalex.org/keywords/parallel-corpora","display_name":"Parallel corpora","score":0.7510660886764526},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6740276217460632},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6153043508529663},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5477447509765625},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.5361509323120117},{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.5248363614082336},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.5176801681518555},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.43356311321258545},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12535837292671204},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.08198937773704529}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9066667556762695},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.836001992225647},{"id":"https://openalex.org/C2985367798","wikidata":"https://www.wikidata.org/wiki/Q1346592","display_name":"Parallel corpora","level":3,"score":0.7510660886764526},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6740276217460632},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6153043508529663},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5477447509765625},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.5361509323120117},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.5248363614082336},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.5176801681518555},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.43356311321258545},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12535837292671204},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.08198937773704529},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1162/089120105775299168","is_oa":true,"landing_page_url":"https://doi.org/10.1162/089120105775299168","pdf_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/089120105775299168","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.100.3392","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.100.3392","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.mt-archive.info/CL-2006-Munteanu.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.329.1486","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.329.1486","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.aclweb.org/anthology/J/J05/J05-4003.pdf","raw_type":"text"},{"id":"pmh:oai:doaj.org/article:19004e2a14c94fe1baf372a647dc9f74","is_oa":false,"landing_page_url":"https://doaj.org/article/19004e2a14c94fe1baf372a647dc9f74","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computational Linguistics, Vol 31, Iss 4 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/089120105775299168","is_oa":true,"landing_page_url":"https://doi.org/10.1162/089120105775299168","pdf_url":"http://www.mitpressjournals.org/doi/pdf/10.1162/089120105775299168","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7599999904632568}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320308668","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2107695330.pdf","grobid_xml":"https://content.openalex.org/works/W2107695330.grobid-xml"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W8895266","https://openalex.org/W13944552","https://openalex.org/W21951956","https://openalex.org/W203603362","https://openalex.org/W222053410","https://openalex.org/W1489181569","https://openalex.org/W1494864219","https://openalex.org/W1497763703","https://openalex.org/W1608419360","https://openalex.org/W1819903106","https://openalex.org/W1889220380","https://openalex.org/W1986098702","https://openalex.org/W2001792610","https://openalex.org/W2006969979","https://openalex.org/W2007433094","https://openalex.org/W2007709031","https://openalex.org/W2014415866","https://openalex.org/W2016630033","https://openalex.org/W2041232209","https://openalex.org/W2047295649","https://openalex.org/W2061235289","https://openalex.org/W2065627366","https://openalex.org/W2079442239","https://openalex.org/W2097333193","https://openalex.org/W2101105183","https://openalex.org/W2102667697","https://openalex.org/W2102749417","https://openalex.org/W2104103102","https://openalex.org/W2108997961","https://openalex.org/W2119788759","https://openalex.org/W2133837072","https://openalex.org/W2138247936","https://openalex.org/W2146574666","https://openalex.org/W2154124206","https://openalex.org/W2156170481","https://openalex.org/W2156985047","https://openalex.org/W2165691108","https://openalex.org/W2166098990","https://openalex.org/W2172138510","https://openalex.org/W4205167092","https://openalex.org/W4241850027"],"related_works":["https://openalex.org/W2786253471","https://openalex.org/W3175595715","https://openalex.org/W2154124206","https://openalex.org/W2797913374","https://openalex.org/W2604275745","https://openalex.org/W2986030184","https://openalex.org/W2104907655","https://openalex.org/W2985215540","https://openalex.org/W4307459710","https://openalex.org/W3155572818"],"abstract_inverted_index":{"We":[0,13,53,75],"present":[1],"a":[2,15,21,69,79,91,100],"novel":[3],"method":[4,106],"for":[5,116],"discovering":[6],"parallel":[7,42,94],"sentences":[8],"in":[9],"comparable,":[10],"non-parallel":[11,50,102],"corpora.":[12,52],"train":[14],"maximum":[16],"entropy":[17],"classifier":[18],"that,":[19],"given":[20],"pair":[22],"of":[23,34,57,68],"sentences,":[24],"can":[25,83,107],"reliably":[26],"determine":[27],"whether":[28],"or":[29],"not":[30],"they":[31],"are":[32,121],"translations":[33],"each":[35],"other.":[36],"Using":[37],"this":[38],"approach,":[39],"we":[40],"extract":[41],"data":[43,60],"from":[44,86],"large":[45,101],"Chinese,":[46],"Arabic,":[47],"and":[48,98],"English":[49],"newspaper":[51],"evaluate":[54],"the":[55,58,66],"quality":[56],"extracted":[59],"by":[61,88],"showing":[62],"that":[63,78],"it":[64],"improves":[65],"performance":[67],"state-of-the-art":[70],"statistical":[71],"machine":[72],"translation":[73],"system.":[74],"also":[76],"show":[77],"good-quality":[80],"MT":[81],"system":[82],"be":[84,108],"built":[85],"scratch":[87],"starting":[89],"with":[90,110],"very":[92],"small":[93],"corpus":[95],"(100,000":[96],"words)":[97],"exploiting":[99],"corpus.":[103],"Thus,":[104],"our":[105],"applied":[109],"great":[111],"benefit":[112],"to":[113],"language":[114],"pairs":[115],"which":[117],"only":[118],"scarce":[119],"resources":[120],"available.":[122]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":19},{"year":2019,"cited_by_count":27},{"year":2018,"cited_by_count":24},{"year":2017,"cited_by_count":27},{"year":2016,"cited_by_count":27},{"year":2015,"cited_by_count":15},{"year":2014,"cited_by_count":31},{"year":2013,"cited_by_count":44},{"year":2012,"cited_by_count":42}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
