{"id":"https://openalex.org/W2949405462","doi":"https://doi.org/10.18653/v1/p19-1118","title":"Unsupervised Parallel Sentence Extraction with Parallel Segment Detection Helps Machine Translation","display_name":"Unsupervised Parallel Sentence Extraction with Parallel Segment Detection Helps Machine Translation","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2949405462","doi":"https://doi.org/10.18653/v1/p19-1118","mag":"2949405462"},"language":"en","primary_location":{"id":"doi:10.18653/v1/p19-1118","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1118","pdf_url":"https://www.aclweb.org/anthology/P19-1118.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/P19-1118.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090950630","display_name":"Viktor Hangya","orcid":"https://orcid.org/0000-0002-5144-3069"},"institutions":[{"id":"https://openalex.org/I3018771216","display_name":"LMU Klinikum","ror":"https://ror.org/02jet3w32","country_code":"DE","type":"healthcare","lineage":["https://openalex.org/I3018771216","https://openalex.org/I8204097"]},{"id":"https://openalex.org/I8204097","display_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/05591te55","country_code":"DE","type":"education","lineage":["https://openalex.org/I8204097"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Viktor Hangya","raw_affiliation_strings":["Center for Information and Language Processing LMU Munich , Germany","Center for Information and Language Processing LMU Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Center for Information and Language Processing LMU Munich , Germany","institution_ids":["https://openalex.org/I3018771216","https://openalex.org/I8204097"]},{"raw_affiliation_string":"Center for Information and Language Processing LMU Munich, Germany","institution_ids":["https://openalex.org/I8204097"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101957153","display_name":"Alexander Fraser","orcid":"https://orcid.org/0000-0003-4891-682X"},"institutions":[{"id":"https://openalex.org/I3018771216","display_name":"LMU Klinikum","ror":"https://ror.org/02jet3w32","country_code":"DE","type":"healthcare","lineage":["https://openalex.org/I3018771216","https://openalex.org/I8204097"]},{"id":"https://openalex.org/I8204097","display_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/05591te55","country_code":"DE","type":"education","lineage":["https://openalex.org/I8204097"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alexander Fraser","raw_affiliation_strings":["Center for Information and Language Processing LMU Munich , Germany","Center for Information and Language Processing LMU Munich, Germany"],"affiliations":[{"raw_affiliation_string":"Center for Information and Language Processing LMU Munich , Germany","institution_ids":["https://openalex.org/I3018771216","https://openalex.org/I8204097"]},{"raw_affiliation_string":"Center for Information and Language Processing LMU Munich, Germany","institution_ids":["https://openalex.org/I8204097"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5090950630"],"corresponding_institution_ids":["https://openalex.org/I3018771216","https://openalex.org/I8204097"],"apc_list":null,"apc_paid":null,"fwci":3.3239,"has_fulltext":true,"cited_by_count":29,"citation_normalized_percentile":{"value":0.93885245,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1224","last_page":"1234"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8716922998428345},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.7122430801391602},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.6432369351387024},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6211994290351868},{"id":"https://openalex.org/keywords/parallel-corpora","display_name":"Parallel corpora","score":0.5941944122314453},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5880674123764038},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5792495608329773},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5427970290184021},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.43756014108657837}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8716922998428345},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.7122430801391602},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.6432369351387024},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6211994290351868},{"id":"https://openalex.org/C2985367798","wikidata":"https://www.wikidata.org/wiki/Q1346592","display_name":"Parallel corpora","level":3,"score":0.5941944122314453},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5880674123764038},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5792495608329773},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5427970290184021},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.43756014108657837},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/p19-1118","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1118","pdf_url":"https://www.aclweb.org/anthology/P19-1118.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/p19-1118","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1118","pdf_url":"https://www.aclweb.org/anthology/P19-1118.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.4099999964237213,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[{"id":"https://openalex.org/G4956428346","display_name":null,"funder_award_id":"Horizon 2020 research and innovatio","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5036817778","display_name":null,"funder_award_id":"European Union's Horizon 2020 research and innov","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5399556803","display_name":null,"funder_award_id":"This project has received funding from the Europea","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G6199968644","display_name":"Domain Adaptation for Statistical Machine Translation","funder_award_id":"640550","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8318064016","display_name":null,"funder_award_id":"Horizon","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8633428685","display_name":null,"funder_award_id":"European Union's Horizon 2020 research and innovat","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2949405462.pdf","grobid_xml":"https://content.openalex.org/works/W2949405462.grobid-xml"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W8895266","https://openalex.org/W342285082","https://openalex.org/W658020064","https://openalex.org/W1542713999","https://openalex.org/W1828724394","https://openalex.org/W2105673178","https://openalex.org/W2126725946","https://openalex.org/W2133564696","https://openalex.org/W2140903445","https://openalex.org/W2222512263","https://openalex.org/W2252212383","https://openalex.org/W2257408573","https://openalex.org/W2294774419","https://openalex.org/W2493916176","https://openalex.org/W2496235729","https://openalex.org/W2512924740","https://openalex.org/W2593864460","https://openalex.org/W2739740656","https://openalex.org/W2740534027","https://openalex.org/W2742155240","https://openalex.org/W2773493195","https://openalex.org/W2794365787","https://openalex.org/W2798389157","https://openalex.org/W2798931235","https://openalex.org/W2799245424","https://openalex.org/W2804625779","https://openalex.org/W2890007195","https://openalex.org/W2952037945","https://openalex.org/W2962824887","https://openalex.org/W2963002901","https://openalex.org/W2963061446","https://openalex.org/W2963118869","https://openalex.org/W2963216553","https://openalex.org/W2963472233","https://openalex.org/W2963602293","https://openalex.org/W2964013027","https://openalex.org/W2964266061","https://openalex.org/W2964308564","https://openalex.org/W4292692470","https://openalex.org/W4298393544","https://openalex.org/W4299579390","https://openalex.org/W4299585995"],"related_works":["https://openalex.org/W2786253471","https://openalex.org/W2990400634","https://openalex.org/W3175595715","https://openalex.org/W4293584592","https://openalex.org/W2986030184","https://openalex.org/W2104907655","https://openalex.org/W4287212313","https://openalex.org/W3155572818","https://openalex.org/W2985215540","https://openalex.org/W2963357083"],"abstract_inverted_index":{"Mining":[0],"parallel":[1,18,40,73,85,110],"sentences":[2,41,63,111],"from":[3,113],"comparable":[4],"corpora":[5],"is":[6,23,59,121],"important.Most":[7],"previous":[8],"work":[9],"relies":[10],"on":[11,17,43,56,81,91,100],"supervised":[12],"systems,":[13],"which":[14],"are":[15],"trained":[16],"data,":[19],"thus":[20],"their":[21],"applicability":[22],"problematic":[24],"in":[25,29,75,95],"low-resource":[26,131],"scenarios.Recent":[27],"developments":[28],"building":[30],"unsupervised":[31,118],"bilingual":[32],"word":[33],"embeddings":[34],"made":[35],"it":[36,125],"possible":[37],"to":[38,129],"mine":[39],"based":[42],"cosine":[44],"similarities":[45],"of":[46],"source":[47],"and":[48,79],"target":[49],"language":[50,93],"words.We":[51],"show":[52,87],"that":[53,109],"relying":[54],"only":[55],"this":[57],"information":[58],"not":[60],"enough,":[61],"since":[62],"often":[64],"have":[65],"similar":[66],"words":[67],"but":[68],"different":[69],"meanings.We":[70],"detect":[71],"continuous":[72],"segments":[74],"sentence":[76],"pair":[77],"candidates":[78],"rely":[80],"them":[82],"when":[83],"mining":[84,89],"sentences.We":[86],"better":[88],"accuracy":[90],"three":[92],"pairs":[94],"a":[96],"standard":[97],"shared":[98],"task":[99],"artificial":[101],"data.We":[102],"also":[103],"provide":[104],"the":[105],"first":[106],"experiments":[107],"showing":[108],"mined":[112],"real":[114],"life":[115],"sources":[116],"improve":[117],"MT.Our":[119],"code":[120],"available,":[122],"we":[123],"hope":[124],"will":[126],"be":[127],"used":[128],"support":[130],"MT":[132],"research.":[133]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
