{"id":"https://openalex.org/W2517504876","doi":"https://doi.org/10.18653/v1/w16-2366","title":"YODA System for WMT16 Shared Task: Bilingual Document Alignment","display_name":"YODA System for WMT16 Shared Task: Bilingual Document Alignment","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2517504876","doi":"https://doi.org/10.18653/v1/w16-2366","mag":"2517504876"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w16-2366","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2366","pdf_url":"https://www.aclweb.org/anthology/W16-2366.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Conference on Machine Translation: Volume 2,\n          Shared Task Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W16-2366.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029174444","display_name":"Aswarth Abhilash Dara","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aswarth Abhilash Dara","raw_affiliation_strings":["Language Technologies Institute Carnegie Mellon University 5000 Forbes Ave, Pittsburgh, PA 15213, USA"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute Carnegie Mellon University 5000 Forbes Ave, Pittsburgh, PA 15213, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044209843","display_name":"Yiu-Chang Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yiu-Chang Lin","raw_affiliation_strings":["Language Technologies Institute Carnegie Mellon University 5000 Forbes Ave, Pittsburgh, PA 15213, USA"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute Carnegie Mellon University 5000 Forbes Ave, Pittsburgh, PA 15213, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5044209843"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":1.2854,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.87046216,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"679","last_page":"684"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8481067419052124},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7042070627212524},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.6440572142601013},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.59860759973526},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5697649717330933},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.5543948411941528},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5038577914237976},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5016920566558838},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.4443938136100769},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.44102591276168823},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39674270153045654},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.36399298906326294},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35511595010757446},{"id":"https://openalex.org/keywords/web-development","display_name":"Web development","score":0.13487160205841064}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8481067419052124},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7042070627212524},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.6440572142601013},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.59860759973526},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5697649717330933},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.5543948411941528},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5038577914237976},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5016920566558838},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.4443938136100769},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.44102591276168823},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39674270153045654},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.36399298906326294},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35511595010757446},{"id":"https://openalex.org/C79373723","wikidata":"https://www.wikidata.org/wiki/Q386275","display_name":"Web development","level":3,"score":0.13487160205841064},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w16-2366","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2366","pdf_url":"https://www.aclweb.org/anthology/W16-2366.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Conference on Machine Translation: Volume 2,\n          Shared Task Papers","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w16-2366","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2366","pdf_url":"https://www.aclweb.org/anthology/W16-2366.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Conference on Machine Translation: Volume 2,\n          Shared Task Papers","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7200000286102295,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2517504876.pdf","grobid_xml":"https://content.openalex.org/works/W2517504876.grobid-xml"},"referenced_works_count":11,"referenced_works":["https://openalex.org/W189596042","https://openalex.org/W1523385540","https://openalex.org/W1819903106","https://openalex.org/W2070150502","https://openalex.org/W2101096097","https://openalex.org/W2101105183","https://openalex.org/W2136189984","https://openalex.org/W2145094598","https://openalex.org/W2149327368","https://openalex.org/W2157331557","https://openalex.org/W2997574889"],"related_works":["https://openalex.org/W1979144454","https://openalex.org/W2358294942","https://openalex.org/W2069679074","https://openalex.org/W4239898202","https://openalex.org/W2115253914","https://openalex.org/W1559090489","https://openalex.org/W3116613346","https://openalex.org/W2055154498","https://openalex.org/W2316185946","https://openalex.org/W2351131669"],"abstract_inverted_index":{"In":[0],"this":[1,96,108,164],"paper,":[2],"we":[3,56],"address":[4],"the":[5,10,30,65,74,92,116,131,146],"task":[6,166],"of":[7,16,25,33,76,89,107,120,128,149,158,174],"automatically":[8],"aligning/detecting":[9],"bilingual":[11],"documents":[12],"that":[13,114,141],"are":[14,98],"translations":[15,119],"each":[17,37,150],"other":[18],"from":[19,95,100],"a":[20,39,51,59,80,87,126,156,172],"single":[21],"web-domain":[22],"as":[23],"part":[24],"WMT":[26],"2016.":[27],"1":[28],"Given":[29],"large":[31],"amounts":[32],"data":[34,148],"available":[35],"in":[36],"web-domain,":[38],"brute":[40],"force":[41],"approach":[42,61,85,97,113,140,170],"like":[43],"finding":[44],"similarities":[45],"between":[46],"every":[47],"possible":[48,77],"pair":[49],"is":[50],"computationally":[52],"expensive":[53],"operation.":[54],"Therefore,":[55],"start":[57],"with":[58],"simple":[60,84],"on":[62,105,130],"matching":[63],"just":[64],"web":[66,122,151],"page":[67,152],"urls":[68],"after":[69],"some":[70],"pre-processing":[71],"to":[72,79,163],"reduce":[73],"number":[75],"pairings":[78],"small":[81],"extent.":[82],"This":[83],"obtained":[86],"recall":[88,127,157,173],"50%":[90],"and":[91,124,145],"exact":[93],"matches":[94],"removed":[99],"further":[101],"consideration.":[102],"We":[103,135],"built":[104],"top":[106],"using":[109,167],"an":[110,138],"n-gram":[111,168],"based":[112,169],"uses":[115,142],"partial":[117],"English":[118],"French":[121],"pages":[123],"achieved":[125,171],"93.71%":[129],"training":[132],"pairs":[133],"provided.":[134],"also":[136],"outline":[137],"IR-based":[139],"both":[143],"content":[144],"meta":[147],"url,":[153],"thereby":[154],"obtaining":[155],"56.31%.":[159],"Our":[160],"final":[161],"submission":[162],"shared":[165],"93.92%.":[175]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
