{"id":"https://openalex.org/W2292999442","doi":"https://doi.org/10.3115/v1/n15-1043","title":"Latent Domain Word Alignment for Heterogeneous Corpora","display_name":"Latent Domain Word Alignment for Heterogeneous Corpora","publication_year":2015,"publication_date":"2015-01-01","ids":{"openalex":"https://openalex.org/W2292999442","doi":"https://doi.org/10.3115/v1/n15-1043","mag":"2292999442"},"language":"en","primary_location":{"id":"doi:10.3115/v1/n15-1043","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/n15-1043","pdf_url":"https://www.aclweb.org/anthology/N15-1043.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/N15-1043.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109596170","display_name":"Hoang Manh Cuong","orcid":null},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Hoang Cuong","raw_affiliation_strings":["Institute for Logic, Language and Computation University of Amsterdam Science Park 107, 1098 XG Amsterdam, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Institute for Logic, Language and Computation University of Amsterdam Science Park 107, 1098 XG Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034829413","display_name":"Khalil Sima\u2019an","orcid":null},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Khalil Sima'an","raw_affiliation_strings":["Institute for Logic, Language and Computation University of Amsterdam Science Park 107, 1098 XG Amsterdam, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Institute for Logic, Language and Computation University of Amsterdam Science Park 107, 1098 XG Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5034829413"],"corresponding_institution_ids":["https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":2.2257,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.90908967,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"398","last_page":"408"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.7803381681442261},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7794118523597717},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.726288914680481},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.6643806099891663},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.6234731078147888},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6197618842124939},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6005242466926575},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14416900277137756}],"concepts":[{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.7803381681442261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7794118523597717},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.726288914680481},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.6643806099891663},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.6234731078147888},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6197618842124939},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6005242466926575},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14416900277137756},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3115/v1/n15-1043","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/n15-1043","pdf_url":"https://www.aclweb.org/anthology/N15-1043.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","raw_type":"proceedings-article"},{"id":"pmh:oai:dare.uva.nl:openaire_cris_publications/c5081bd1-f195-460d-8180-3c0e7780ed41","is_oa":false,"landing_page_url":"https://dare.uva.nl/personal/pure/en/publications/latent-domain-word-alignment-for-heterogeneous-corpora(c5081bd1-f195-460d-8180-3c0e7780ed41).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Cuong, H & Simaan, K 2015, Latent Domain Word Alignment for Heterogeneous Corpora. in R Mihalcea, J Chai & A Sarkar (eds), NAACL HLT 2015: The 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies : Proceedings of the Conference : May 31-June 5, 2015, Denver, Colorado, USA. Stroudsburg, PA, pp. 398-408, Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL HLT 2015, Denver, United States, 31/05/15. < http://aclweb.org/anthology/N/N15/N15-1043.pdf >","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:dare.uva.nl:publications/c5081bd1-f195-460d-8180-3c0e7780ed41","is_oa":true,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/latent-domain-word-alignment-for-heterogeneous-corpora(c5081bd1-f195-460d-8180-3c0e7780ed41).html","pdf_url":"http://aclweb.org/anthology/N/N15/N15-1043.pdf","source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Cuong, H & Simaan, K 2015, Latent Domain Word Alignment for Heterogeneous Corpora. in R Mihalcea, J Chai & A Sarkar (eds), NAACL HLT 2015: The 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies : Proceedings of the Conference : May 31-June 5, 2015, Denver, Colorado, USA. Stroudsburg, PA, pp. 398-408, Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL HLT 2015, Denver, United States, 31/05/15. < http://aclweb.org/anthology/N/N15/N15-1043.pdf >","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:dare.uva.nl:publications/c5081bd1-f195-460d-8180-3c0e7780ed41","is_oa":false,"landing_page_url":"http://dare.uva.nl/personal/pure/en/publications/latent-domain-word-alignment-for-heterogeneous-corpora(c5081bd1-f195-460d-8180-3c0e7780ed41).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"doi:10.3115/v1/n15-1043","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/n15-1043","pdf_url":"https://www.aclweb.org/anthology/N15-1043.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.4399999976158142}],"awards":[{"id":"https://openalex.org/G3981126665","display_name":null,"funder_award_id":"Seventh Framework Programme","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G4423458288","display_name":null,"funder_award_id":"277-89-002","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G629491556","display_name":null,"funder_award_id":"(NWO)","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2292999442.pdf","grobid_xml":"https://content.openalex.org/works/W2292999442.grobid-xml"},"referenced_works_count":45,"referenced_works":["https://openalex.org/W22168010","https://openalex.org/W23077562","https://openalex.org/W92833821","https://openalex.org/W169216084","https://openalex.org/W1746680969","https://openalex.org/W1848260265","https://openalex.org/W2004447574","https://openalex.org/W2006969979","https://openalex.org/W2038698865","https://openalex.org/W2080373976","https://openalex.org/W2095755718","https://openalex.org/W2101105183","https://openalex.org/W2116229791","https://openalex.org/W2116492146","https://openalex.org/W2117045850","https://openalex.org/W2124807415","https://openalex.org/W2133622676","https://openalex.org/W2140702357","https://openalex.org/W2142669279","https://openalex.org/W2144600658","https://openalex.org/W2145110208","https://openalex.org/W2145350077","https://openalex.org/W2149327368","https://openalex.org/W2149816711","https://openalex.org/W2151594415","https://openalex.org/W2153653739","https://openalex.org/W2154368244","https://openalex.org/W2154632340","https://openalex.org/W2156985047","https://openalex.org/W2159755860","https://openalex.org/W2160131015","https://openalex.org/W2163561955","https://openalex.org/W2169724380","https://openalex.org/W2181672650","https://openalex.org/W2250307842","https://openalex.org/W2250545560","https://openalex.org/W2251004829","https://openalex.org/W2251590347","https://openalex.org/W2252217186","https://openalex.org/W2257408573","https://openalex.org/W2270190199","https://openalex.org/W2350508517","https://openalex.org/W2595715041","https://openalex.org/W3010865323","https://openalex.org/W4241645538"],"related_works":["https://openalex.org/W2375873920","https://openalex.org/W2146114872","https://openalex.org/W2392060890","https://openalex.org/W2392760275","https://openalex.org/W2083530853","https://openalex.org/W2982905616","https://openalex.org/W2009831055","https://openalex.org/W2393172683","https://openalex.org/W3211744874","https://openalex.org/W2369835347"],"abstract_inverted_index":{"This":[0],"work":[1],"focuses":[2],"on":[3,19,45],"the":[4,43,79],"insensitivity":[5],"of":[6,56,98],"existing":[7],"word":[8,27,69,91],"alignment":[9,28,37,70,92],"models":[10],"to":[11,41],"domain":[12,26],"differences,":[13],"which":[14,32],"often":[15],"yields":[16],"suboptimal":[17],"results":[18],"large":[20],"heterogeneous":[21,47],"data.":[22],"A":[23],"novel":[24],"latent":[25],"model":[29,44],"is":[30],"proposed,":[31],"induces":[33],"domain-conditioned":[34,68,81],"lexical":[35],"and":[36,94],"statistics.":[38],"We":[39],"propose":[40],"train":[42],"a":[46,53],"corpus":[48],"under":[49],"partial":[50],"supervision,":[51],"using":[52],"small":[54],"number":[55],"seed":[57,63],"samples":[58,64],"from":[59],"different":[60],"domains.":[61],"The":[62],"allow":[65],"estimating":[66],"sharper,":[67],"statistics":[71],"for":[72],"sentence":[73],"pairs.":[74],"Our":[75],"experiments":[76],"show":[77],"that":[78],"derived":[80],"statistics,":[82],"once":[83],"combined":[84],"together,":[85],"produce":[86],"notable":[87],"improvements":[88],"both":[89],"in":[90,95],"accuracy":[93,97],"translation":[96],"their":[99],"resulting":[100],"SMT":[101],"systems.":[102]},"counts_by_year":[{"year":2019,"cited_by_count":3},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
