{"id":"https://openalex.org/W7139067247","doi":"https://doi.org/10.48550/arxiv.2603.16606","title":"Omnilingual SONAR: Cross-Lingual and Cross-Modal Sentence Embeddings Bridging Massively Multilingual Text and Speech","display_name":"Omnilingual SONAR: Cross-Lingual and Cross-Modal Sentence Embeddings Bridging Massively Multilingual Text and Speech","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7139067247","doi":"https://doi.org/10.48550/arxiv.2603.16606"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.16606","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16606","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.16606","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130145384","display_name":"Omnilingual SONAR Team","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Omnilingual SONAR Team","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037739770","display_name":"Jo\u00e3o Maria Janeiro","orcid":"https://orcid.org/0009-0005-2092-7860"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Janeiro, Jo\u00e3o Maria","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115978593","display_name":"Pere-Lu\u00eds Huguet Cabot","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cabot, Pere-Llu\u00eds Huguet","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076257265","display_name":"Ioannis Tsiamas","orcid":"https://orcid.org/0000-0003-1049-2515"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tsiamas, Ioannis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062573067","display_name":"Yen Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Yen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073712726","display_name":"Vivek Iyer","orcid":"https://orcid.org/0000-0001-6441-9319"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Iyer, Vivek","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059260639","display_name":"Guillem Ram\u00edrez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ram\u00edrez, Guillem","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124579775","display_name":"Lo\u00efc Barrault","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Barrault, Loic","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129933960","display_name":"Belen Alastruey","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alastruey, Belen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008107321","display_name":"Yu-An Chung","orcid":"https://orcid.org/0000-0001-9451-7956"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chung, Yu-An","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130202840","display_name":"Marta R. Costa-juss\u00e0","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Costa-Jussa, Marta R.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020407806","display_name":"David Dale","orcid":"https://orcid.org/0000-0003-2045-6833"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dale, David","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130077039","display_name":"Kevin Heffernan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heffernan, Kevin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129757788","display_name":"Jaehyeong Jo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jo, Jaehyeong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112988007","display_name":"Artyom Kozhevnikov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kozhevnikov, Artyom","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130051362","display_name":"Alexandre Mourachko","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mourachko, Alexandre","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030185276","display_name":"Christophe Ropers","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ropers, Christophe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015857371","display_name":"Holger Schwenk","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schwenk, Holger","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130148872","display_name":"Paul-Ambroise Duquenne","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duquenne, Paul-Ambroise","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":19,"corresponding_author_ids":["https://openalex.org/A5130145384"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.2515999972820282,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.2515999972820282,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.21559999883174896,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.16419999301433563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.7407000064849854},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6092000007629395},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5997999906539917},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5965999960899353},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.44279998540878296},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.38920000195503235},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.36559998989105225},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.3319999873638153}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.822700023651123},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.7407000064849854},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6572999954223633},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6092000007629395},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5997999906539917},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5965999960899353},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5871000289916992},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.44279998540878296},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.38920000195503235},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.36559998989105225},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.3319999873638153},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.328900009393692},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.32739999890327454},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3203999996185303},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3197000026702881},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.298799991607666},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2987000048160553},{"id":"https://openalex.org/C2778243841","wikidata":"https://www.wikidata.org/wiki/Q56467","display_name":"Modern Standard Arabic","level":3,"score":0.28220000863075256},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.2687999904155731},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.2612999975681305}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.16606","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16606","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.16606","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16606","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6300646066665649,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Cross-lingual":[0],"sentence":[1,32],"encoders":[2],"typically":[3],"cover":[4],"only":[5,223],"a":[6,24,45,81,96,117,155,205],"few":[7],"hundred":[8],"languages":[9,87,137,188,249],"and":[10,30,41,101,151,174,199,210,250],"often":[11],"trade":[12],"downstream":[13,52,254],"quality":[14],"for":[15,85,220,252],"stronger":[16],"alignment,":[17],"limiting":[18],"their":[19],"adoption.":[20],"We":[21,78],"introduce":[22],"OmniSONAR,":[23],"new":[25],"family":[26],"of":[27,57,59,129,157,213,248],"omnilingual,":[28],"cross-lingual":[29,142],"cross-modal":[31,127],"embedding":[33,240],"models":[34,177],"that":[35],"natively":[36],"embed":[37],"text,":[38],"speech,":[39,202],"code,":[40],"mathematical":[42],"expressions":[43],"in":[44],"single":[46],"semantic":[47],"space,":[48],"while":[49],"delivering":[50],"state-of-the-art":[51],"performance":[53],"at":[54],"the":[55,126,147,160],"scale":[56,70],"thousands":[58,113,247],"languages,":[60],"from":[61],"high-resource":[62],"to":[63,111,246],"extremely":[64],"low-resource":[65],"varieties.":[66],"To":[67],"reach":[68],"this":[69,107,130],"without":[71],"representation":[72],"collapse,":[73],"we":[74,109,124,242],"use":[75],"progressive":[76],"training.":[77],"first":[79],"learn":[80],"strong":[82,167],"foundational":[83],"space":[84,131],"200":[86],"with":[88,95],"an":[89,230],"LLM-initialized":[90],"encoder-decoder,":[91],"combining":[92],"token-level":[93],"decoding":[94],"novel":[97],"split-softmax":[98],"contrastive":[99],"loss":[100],"synthetic":[102],"hard":[103],"negatives.":[104],"Building":[105],"on":[106,146,159,171,186,197,224,235],"foundation,":[108],"expand":[110],"several":[112],"language":[114],"varieties":[115],"via":[116],"two-stage":[118],"teacher-student":[119],"encoder":[120],"distillation":[121],"framework.":[122],"Finally,":[123,227],"demonstrate":[125],"extensibility":[128],"by":[132,154,182,228],"seamlessly":[133],"mapping":[134],"177":[135],"spoken":[136],"into":[138,189],"it.":[139],"OmniSONAR":[140,193,203,239],"halves":[141],"similarity":[143],"search":[144],"error":[145,153,209],"200-language":[148],"FLORES":[149],"dataset":[150],"reduces":[152],"factor":[156],"15":[158,183],"1,560-language":[161],"BIBLE":[162,191],"benchmark.":[163],"It":[164],"also":[165,194],"enables":[166],"translation,":[168],"outperforming":[169],"NLLB-3B":[170],"multilingual":[172],"benchmarks":[173],"exceeding":[175],"prior":[176],"(including":[178],"much":[179],"larger":[180],"LLMs)":[181],"chrF++":[184],"points":[185],"1,560":[187],"English":[190,236],"translation.":[192],"performs":[195],"strongly":[196],"MTEB":[198],"XLCoST.":[200],"For":[201],"achieves":[204],"43%":[206],"lower":[207],"similarity-search":[208],"reaches":[211],"97%":[212],"SeamlessM4T":[214],"speech-to-text":[215],"quality,":[216],"despite":[217],"being":[218],"zero-shot":[219],"translation":[221],"(trained":[222],"ASR":[225],"data).":[226],"training":[229],"encoder-decoder":[231],"LM,":[232],"Spectrum,":[233],"exclusively":[234],"text":[237],"processing":[238],"sequences,":[241],"unlock":[243],"high-performance":[244],"transfer":[245],"speech":[251],"complex":[253],"tasks.":[255]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-20T00:00:00"}
