{"id":"https://openalex.org/W2155603234","doi":"https://doi.org/10.3115/v1/d14-1177","title":"Combining String and Context Similarity for Bilingual Term Alignment from Comparable Corpora","display_name":"Combining String and Context Similarity for Bilingual Term Alignment from Comparable Corpora","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2155603234","doi":"https://doi.org/10.3115/v1/d14-1177","mag":"2155603234"},"language":"en","primary_location":{"id":"doi:10.3115/v1/d14-1177","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/d14-1177","pdf_url":"https://aclanthology.org/D14-1177.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/D14-1177.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077625160","display_name":"Georgios Kontonatsios","orcid":"https://orcid.org/0000-0001-5935-4709"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Georgios Kontonatsios","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China","School of Computer Science, University of Manchester, Manchester, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"School of Computer Science, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060320466","display_name":"Ioannis Korkontzelos","orcid":"https://orcid.org/0000-0001-8052-2471"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Ioannis Korkontzelos","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China","School of Computer Science, University of Manchester, Manchester, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"School of Computer Science, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112241147","display_name":"Jun\u2019ichi Tsujii","orcid":null},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jun'ichi Tsujii","raw_affiliation_strings":["National Centre for Text Mining, University of Manchester, Manchester, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Centre for Text Mining, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077976343","display_name":"Sophia Ananiadou","orcid":"https://orcid.org/0000-0002-4097-9191"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Sophia Ananiadou","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China","School of Computer Science, University of Manchester, Manchester, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"School of Computer Science, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.5375,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.91409666,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1701","last_page":"1712"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7492321729660034},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.7310191988945007},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.7086809873580933},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6886991262435913},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.6561805605888367},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5805768370628357},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5765915513038635},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17462706565856934},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08102840185165405}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7492321729660034},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.7310191988945007},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.7086809873580933},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6886991262435913},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.6561805605888367},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5805768370628357},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5765915513038635},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17462706565856934},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08102840185165405},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":7,"locations":[{"id":"doi:10.3115/v1/d14-1177","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/d14-1177","pdf_url":"https://aclanthology.org/D14-1177.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/2ba990e2-ce90-4889-8f0c-4a24ea352421","is_oa":false,"landing_page_url":"https://research.manchester.ac.uk/en/publications/2ba990e2-ce90-4889-8f0c-4a24ea352421","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Kontonatsios, G, Korkontzelos, I, Tsujii, J & Ananiadou, S 2014, Combining String and Context Similarity for Bilingual Term Alignment from Comparable Corpora. in Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP). Association for Computational Linguistics, pp. 1701-1712. < http://www.aclweb.org/anthology/D14-1177 >","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.669.2335","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.669.2335","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://aclweb.org/anthology/D/D14/D14-1177.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.672.716","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.672.716","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://emnlp2014.org/papers/pdf/EMNLP2014177.pdf","raw_type":"text"},{"id":"pmh:oai:pure.atira.dk:publications/2ba990e2-ce90-4889-8f0c-4a24ea352421","is_oa":true,"landing_page_url":"http://www.aclweb.org/anthology/D14-1177","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Kontonatsios, G, Korkontzelos, I, Tsujii, J & Ananiadou, S 2014, Combining String and Context Similarity for Bilingual Term Alignment from Comparable Corpora. in Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP). Association for Computational Linguistics, pp. 1701-1712. < http://www.aclweb.org/anthology/D14-1177 >","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.atira.dk:publications/3ceafbf4-b3e2-43e2-9cca-23286a9bf5c8","is_oa":true,"landing_page_url":"https://research.edgehill.ac.uk/en/publications/3ceafbf4-b3e2-43e2-9cca-23286a9bf5c8","pdf_url":null,"source":{"id":"https://openalex.org/S4306402462","display_name":"Edge Hill University Research Information Repository (Edge Hill University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I165525304","host_organization_name":"Edge Hill University","host_organization_lineage":["https://openalex.org/I165525304"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Kontonatsios, G, Korkontzelos, I, Tsujii, J & Ananiadou, S 2014, 'Combining String and Context Similarity for Bilingual Term Alignment from Comparable Corpora', Paper presented at Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), Doha, Qatar, 25/10/14 - 29/10/14 pp. 1701-1712. < http://www.aclweb.org/anthology/D14-1177 >","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:repository.edgehill.ac.uk:6990","is_oa":false,"landing_page_url":"http://repository.edgehill.ac.uk/6990/1/D14-1177.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4406922844","display_name":"Edge Hill University Research Archive","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"}],"best_oa_location":{"id":"doi:10.3115/v1/d14-1177","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/d14-1177","pdf_url":"https://aclanthology.org/D14-1177.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.800000011920929}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2155603234.pdf","grobid_xml":"https://content.openalex.org/works/W2155603234.grobid-xml"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W38126138","https://openalex.org/W72471529","https://openalex.org/W116705248","https://openalex.org/W189286601","https://openalex.org/W204594949","https://openalex.org/W635530177","https://openalex.org/W1480376833","https://openalex.org/W1480519300","https://openalex.org/W1505048671","https://openalex.org/W1532624895","https://openalex.org/W1576928919","https://openalex.org/W1582850894","https://openalex.org/W1693107767","https://openalex.org/W2003864163","https://openalex.org/W2008248260","https://openalex.org/W2011248508","https://openalex.org/W2023045767","https://openalex.org/W2040759639","https://openalex.org/W2041232209","https://openalex.org/W2054890041","https://openalex.org/W2072976288","https://openalex.org/W2075728986","https://openalex.org/W2100693535","https://openalex.org/W2102749417","https://openalex.org/W2116780029","https://openalex.org/W2118585731","https://openalex.org/W2118606687","https://openalex.org/W2124807415","https://openalex.org/W2133990480","https://openalex.org/W2153635508","https://openalex.org/W2156985047","https://openalex.org/W2159583324","https://openalex.org/W2162967019","https://openalex.org/W2167265720","https://openalex.org/W2250229103","https://openalex.org/W2250863245","https://openalex.org/W2251853682","https://openalex.org/W2587282545","https://openalex.org/W2595715041","https://openalex.org/W2787894218","https://openalex.org/W2882319491","https://openalex.org/W2911964244","https://openalex.org/W2986345682","https://openalex.org/W3120421331","https://openalex.org/W4206885513","https://openalex.org/W4253938478","https://openalex.org/W4302083825","https://openalex.org/W4302780227"],"related_works":["https://openalex.org/W42295635","https://openalex.org/W1973996291","https://openalex.org/W2330575325","https://openalex.org/W4388747848","https://openalex.org/W2163803519","https://openalex.org/W2497592525","https://openalex.org/W3096145648","https://openalex.org/W3197510923","https://openalex.org/W2370579019","https://openalex.org/W4313265328"],"abstract_inverted_index":{"Automatically":[0],"compiling":[1],"bilingual":[2],"dictionaries":[3],"of":[4,78,98,111,114,120],"technical":[5],"terms":[6,31],"from":[7],"comparable":[8],"corpora":[9,110],"is":[10],"a":[11,43,62,68,74,138],"challenging":[12],"problem,":[13],"yet":[14],"with":[15],"many":[16],"potential":[17],"applications.":[18],"In":[19],"this":[20],"paper,":[21],"we":[22,60,86,94,126,142],"exploit":[23],"two":[24,129,148],"independent":[25],"observations":[26],"about":[27],"term":[28,44,79],"translations:":[29],"(a)":[30,104],"are":[32],"often":[33],"formed":[34],"by":[35],"corresponding":[36],"sub-lexical":[37],"units":[38],"across":[39],"languages":[40],"and":[41,45,100,106,116,122,134,141],"(b)":[42,109],"its":[46],"translation":[47,119,130,149],"tend":[48],"to":[49,82],"appear":[50],"in":[51,137],"similar":[52,105],"lexical":[53],"context.":[54],"Based":[55],"on":[56],"the":[57,83,96,118,128,147],"first":[58],"observation,":[59,85],"develop":[61],"new":[63],"character":[64],"n-gram":[65],"compositional":[66,99],"method,":[67],"logistic":[69],"regression":[70],"classifier,":[71],"for":[72],"learning":[73],"string":[75,133],"similarity":[76],"measure":[77],"translations.":[80],"According":[81],"second":[84],"use":[87],"an":[88],"existing":[89],"context-based":[90,101],"approach.":[91],"For":[92],"evaluation,":[93],"investigate":[95],"performance":[97],"methods":[102],"on:":[103],"unrelated":[107],"languages,":[108],"different":[112],"degree":[113],"comparability":[115],"(c)":[117],"frequent":[121],"rare":[123],"terms.":[124],"Finally,":[125],"combine":[127],"clues,":[131],"namely":[132],"contextual":[135],"similarity,":[136],"linear":[139],"model":[140],"show":[143],"substantial":[144],"improvements":[145],"over":[146],"signals.":[150]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
