{"id":"https://openalex.org/W2576742006","doi":"https://doi.org/10.18653/v1/w16-2368","title":"Bilingual Document Alignment with Latent Semantic Indexing","display_name":"Bilingual Document Alignment with Latent Semantic Indexing","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2576742006","doi":"https://doi.org/10.18653/v1/w16-2368","mag":"2576742006"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w16-2368","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2368","pdf_url":"https://www.aclweb.org/anthology/W16-2368.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Conference on Machine Translation: Volume 2,\n          Shared Task Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W16-2368.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027569258","display_name":"Ulrich Germann","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ulrich Germann","raw_affiliation_strings":["School of Informatics University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"School of Informatics University of Edinburgh","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5027569258"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.8834,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.85454413,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"692","last_page":"696"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8067408204078674},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.5917004346847534},{"id":"https://openalex.org/keywords/latent-semantic-analysis","display_name":"Latent semantic analysis","score":0.5341948866844177},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5229088664054871},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5086625218391418},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.5058050155639648},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.4990520477294922},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4935554265975952},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.4665531814098358},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4613973796367645},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4579514265060425},{"id":"https://openalex.org/keywords/singular-value-decomposition","display_name":"Singular value decomposition","score":0.4374430477619171},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.18448898196220398},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10345324873924255}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8067408204078674},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.5917004346847534},{"id":"https://openalex.org/C170133592","wikidata":"https://www.wikidata.org/wiki/Q1806883","display_name":"Latent semantic analysis","level":2,"score":0.5341948866844177},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5229088664054871},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5086625218391418},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.5058050155639648},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.4990520477294922},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4935554265975952},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.4665531814098358},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4613973796367645},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4579514265060425},{"id":"https://openalex.org/C22789450","wikidata":"https://www.wikidata.org/wiki/Q420904","display_name":"Singular value decomposition","level":2,"score":0.4374430477619171},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.18448898196220398},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10345324873924255},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/w16-2368","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2368","pdf_url":"https://www.aclweb.org/anthology/W16-2368.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Conference on Machine Translation: Volume 2,\n          Shared Task Papers","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:publications/6ca5998a-dccf-4f0a-8cd4-168012024234","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/6ca5998a-dccf-4f0a-8cd4-168012024234","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"doi:10.18653/v1/w16-2368","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2368","pdf_url":"https://www.aclweb.org/anthology/W16-2368.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Conference on Machine Translation: Volume 2,\n          Shared Task Papers","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7400000095367432}],"awards":[{"id":"https://openalex.org/G4956428346","display_name":null,"funder_award_id":"Horizon 2020 research and innovatio","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5036817778","display_name":null,"funder_award_id":"European Union's Horizon 2020 research and innov","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G7216478277","display_name":"Scalable Understanding of Multilingual Media","funder_award_id":"688139","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G7842005466","display_name":null,"funder_award_id":"Horizon 2020","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8633428685","display_name":null,"funder_award_id":"European Union's Horizon 2020 research and innovat","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G939802945","display_name":"MMT will deliver a language independent commercial online translation service based on a new open-source machine translation distributed architecture","funder_award_id":"645487","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2576742006.pdf","grobid_xml":"https://content.openalex.org/works/W2576742006.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W108738318","https://openalex.org/W1574901103","https://openalex.org/W2047295649","https://openalex.org/W2074231493","https://openalex.org/W2085177270","https://openalex.org/W2091273188","https://openalex.org/W2138787466","https://openalex.org/W2144211451","https://openalex.org/W2145080939","https://openalex.org/W2147152072","https://openalex.org/W6662462842","https://openalex.org/W6680516722","https://openalex.org/W6681116826","https://openalex.org/W6681698864"],"related_works":["https://openalex.org/W2420905679","https://openalex.org/W3000203418","https://openalex.org/W2147688565","https://openalex.org/W3152143533","https://openalex.org/W3016822073","https://openalex.org/W2135511601","https://openalex.org/W1580193008","https://openalex.org/W2108316317","https://openalex.org/W2145819623","https://openalex.org/W2136202239"],"abstract_inverted_index":{"We":[0],"apply":[1],"cross-lingual":[2],"Latent":[3],"Semantic":[4],"Indexing":[5],"to":[6,35,72],"the":[7,30,50,57,101],"Bilingual":[8],"Document":[9],"Alignment":[10],"Task":[11],"at":[12],"WMT16.":[13],"Reduced-rank":[14],"singular":[15],"value":[16],"decomposition":[17],"of":[18,46,66,76,83,90],"a":[19,40,64,81,88],"bilingual":[20],"term-document":[21],"matrix":[22],"derived":[23],"from":[24],"known":[25],"English/French":[26,77],"page":[27],"pairs":[28],"in":[29,80],"training":[31],"data":[32,96,109],"allows":[33],"us":[34],"map":[36],"monolingual":[37],"documents":[38],"into":[39,56],"joint":[41,58],"semantic":[42,59,103],"space.":[43],"Two":[44],"variants":[45],"cosine":[47],"similarity":[48,68],"between":[49,69],"vectors":[51],"that":[52],"place":[53],"each":[54],"document":[55],"space":[60],"are":[61],"combined":[62],"with":[63],"measure":[65],"string":[67],"corresponding":[70],"URLs":[71],"produce":[73],"1:1":[74],"alignments":[75],"web":[78],"pages":[79],"variety":[82],"domains.":[84],"The":[85],"system":[86],"achieves":[87],"recall":[89],"ca.":[91],"88%":[92],"if":[93,107],"no":[94],"in-domain":[95],"is":[97,110],"used":[98],"for":[99],"building":[100],"latent":[102],"model,":[104],"and":[105],"93%":[106],"such":[108],"included.":[111]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
