{"id":"https://openalex.org/W2907069963","doi":"https://doi.org/10.1145/3289600.3291023","title":"Crosslingual Document Embedding as Reduced-Rank Ridge Regression","display_name":"Crosslingual Document Embedding as Reduced-Rank Ridge Regression","publication_year":2019,"publication_date":"2019-01-30","ids":{"openalex":"https://openalex.org/W2907069963","doi":"https://doi.org/10.1145/3289600.3291023","mag":"2907069963"},"language":"en","primary_location":{"id":"doi:10.1145/3289600.3291023","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3289600.3291023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1904.03922","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057031908","display_name":"Martin Josifoski","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Martin Josifoski","raw_affiliation_strings":["EPFL, Lausanne, Switzerland"],"affiliations":[{"raw_affiliation_string":"EPFL, Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017286800","display_name":"Ivan Paskov","orcid":"https://orcid.org/0000-0002-5161-1771"},"institutions":[{"id":"https://openalex.org/I4210110987","display_name":"IIT@MIT","ror":"https://ror.org/01wp8zh54","country_code":"US","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210110987"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ivan S. Paskov","raw_affiliation_strings":["MIT, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210110987"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025508471","display_name":"Hristo S. Paskov","orcid":null},"institutions":[{"id":"https://openalex.org/I55884533","display_name":"BlackRock (United States)","ror":"https://ror.org/031dc4703","country_code":"US","type":"company","lineage":["https://openalex.org/I55884533"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hristo S. Paskov","raw_affiliation_strings":["BlackRock, Palo Alto, CA, USA"],"affiliations":[{"raw_affiliation_string":"BlackRock, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I55884533"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073756389","display_name":"Martin Jaggi","orcid":"https://orcid.org/0000-0003-1579-5558"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Martin Jaggi","raw_affiliation_strings":["EPFL, Lausanne, Switzerland"],"affiliations":[{"raw_affiliation_string":"EPFL, Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101446790","display_name":"Robert West","orcid":"https://orcid.org/0000-0002-3984-1232"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Robert West","raw_affiliation_strings":["EPFL, Lausanne, Switzerland"],"affiliations":[{"raw_affiliation_string":"EPFL, Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5057031908"],"corresponding_institution_ids":["https://openalex.org/I5124864"],"apc_list":null,"apc_paid":null,"fwci":1.876,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.89145962,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"744","last_page":"752"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8017240762710571},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.7079600691795349},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.648527204990387},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6108076572418213},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5945650339126587},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5811273455619812},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.49061957001686096},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.4798673987388611},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4347935914993286},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.16145727038383484},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11801588535308838}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8017240762710571},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.7079600691795349},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.648527204990387},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6108076572418213},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5945650339126587},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5811273455619812},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.49061957001686096},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.4798673987388611},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4347935914993286},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.16145727038383484},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11801588535308838},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1145/3289600.3291023","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3289600.3291023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1904.03922","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1904.03922","pdf_url":"https://arxiv.org/pdf/1904.03922","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:infoscience.epfl.ch:263893","is_oa":true,"landing_page_url":"http://infoscience.epfl.ch/record/263893","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference proceedings"},{"id":"pmh:oai:zenodo.org:2597441","is_oa":true,"landing_page_url":"https://zenodo.org/record/2597441","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"mag:3160728629","is_oa":false,"landing_page_url":"https://jglobal.jst.go.jp/en/detail?JGLOBAL_ID=202002245842517867","pdf_url":null,"source":{"id":"https://openalex.org/S4306500161","display_name":"ACM Proceedings","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"ACM Proceedings","raw_type":null}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1904.03922","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1904.03922","pdf_url":"https://arxiv.org/pdf/1904.03922","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.8399999737739563,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W22168010","https://openalex.org/W58646613","https://openalex.org/W175058090","https://openalex.org/W342285082","https://openalex.org/W1480376833","https://openalex.org/W1542713999","https://openalex.org/W1666605052","https://openalex.org/W1790609997","https://openalex.org/W1828724394","https://openalex.org/W1972676371","https://openalex.org/W2028776121","https://openalex.org/W2044157185","https://openalex.org/W2056392803","https://openalex.org/W2068297964","https://openalex.org/W2116444583","https://openalex.org/W2126725946","https://openalex.org/W2142074148","https://openalex.org/W2153579005","https://openalex.org/W2157086813","https://openalex.org/W2157133710","https://openalex.org/W2157791002","https://openalex.org/W2250193220","https://openalex.org/W2250539671","https://openalex.org/W2250646737","https://openalex.org/W2251033195","https://openalex.org/W2251804196","https://openalex.org/W2270364989","https://openalex.org/W2282078507","https://openalex.org/W2294774419","https://openalex.org/W2426917359","https://openalex.org/W2494423583","https://openalex.org/W2558395983","https://openalex.org/W2561995736","https://openalex.org/W2739533179","https://openalex.org/W2740011246","https://openalex.org/W2741602058","https://openalex.org/W2762484717","https://openalex.org/W2797253696","https://openalex.org/W2949402715","https://openalex.org/W2950133940","https://openalex.org/W2952037945","https://openalex.org/W2952190837","https://openalex.org/W2952381430","https://openalex.org/W2952405450","https://openalex.org/W2963061446","https://openalex.org/W2963617771","https://openalex.org/W3004533406","https://openalex.org/W3010805239","https://openalex.org/W3102374157","https://openalex.org/W3104120087","https://openalex.org/W4213169074","https://openalex.org/W4233372644","https://openalex.org/W4294170691","https://openalex.org/W4299579390"],"related_works":["https://openalex.org/W2081900870","https://openalex.org/W2375873920","https://openalex.org/W2183306018","https://openalex.org/W2549990292","https://openalex.org/W2345479200","https://openalex.org/W2146114872","https://openalex.org/W2392060890","https://openalex.org/W2911655849","https://openalex.org/W4286432911","https://openalex.org/W3134737443"],"abstract_inverted_index":{"There":[0],"has":[1],"recently":[2],"been":[3],"much":[4],"interest":[5],"in":[6,41,64,94],"extending":[7],"vector-based":[8],"word":[9,145,156,224],"representations":[10],"to":[11,31,96,115,124,132,137,149,158],"multiple":[12,65],"languages,":[13],"such":[14,73],"that":[15,89,100,194],"words":[16,30],"can":[17,121],"be":[18,116,122],"compared":[19],"across":[20],"languages.":[21],"In":[22],"this":[23],"paper,":[24],"we":[25],"shift":[26],"the":[27,59,98,111,126,180],"focus":[28],"from":[29,129],"documents":[32,39],"and":[33,153,166,213,223],"introduce":[34],"a":[35,45,55,86,101,201],"method":[36,196],"for":[37,210],"embedding":[38,211],"written":[40],"any":[42],"language":[43],"into":[44],"single,":[46],"language-independent":[47,133],"vector":[48],"space.":[49],"For":[50],"training,":[51],"our":[52,177,195],"approach":[53],"leverages":[54],"multilingual":[56],"corpus":[57],"where":[58],"same":[60],"concept":[61,99],"is":[62,104,163,167,189],"covered":[63],"languages":[66],"(but":[67],"not":[68,208],"necessarily":[69],"via":[70],"exact":[71],"translations),":[72],"as":[74,171,173,184],"Wikipedia.":[75],"Our":[76],"method,":[77],"Cr5":[78,162],"(Crosslingual":[79],"reduced-rank":[80],"ridge":[81],"regression),":[82],"starts":[83],"by":[84],"training":[85],"ridge-regression-based":[87],"classifier":[88],"uses":[90,179],"language-specific":[91,130],"bag-of-word":[92],"features":[93],"order":[95],"predict":[97],"given":[102],"document":[103,160,203],"about.":[105],"We":[106],"show":[107,193],"that,":[108],"when":[109],"constraining":[110],"learned":[112],"weight":[113],"matrix":[114],"of":[117],"low":[118],"rank,":[119],"it":[120,188,215],"factored":[123],"obtain":[125,159],"desired":[127],"mappings":[128],"bags-of-words":[131],"embeddings.":[134],"As":[135],"opposed":[136],"most":[138],"prior":[139],"methods,":[140],"which":[141],"use":[142],"pretrained":[143],"monolingual":[144],"vectors,":[146,161],"postprocess":[147],"them":[148,151],"make":[150],"crosslingual,":[152],"finally":[154],"average":[155],"vectors":[157],"trained":[164,209],"end-to-end":[165],"thus":[168],"natively":[169],"crosslingual":[170,202,221],"well":[172],"document-level.":[174],"Moreover,":[175],"since":[176],"algorithm":[178],"singular":[181],"value":[182],"decomposition":[183],"its":[185],"core":[186],"operation,":[187],"highly":[190],"scalable.":[191],"Experiments":[192],"achieves":[197,217],"state-of-the-art":[198],"performance":[199,219],"on":[200,220],"retrieval":[204,225],"task.":[205],"Finally,":[206],"although":[207],"sentences":[212],"words,":[214],"also":[216],"competitive":[218],"sentence":[222],"tasks.":[226]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
