{"id":"https://openalex.org/W7134235616","doi":"https://doi.org/10.48550/arxiv.2603.05528","title":"Omni-C: Compressing Heterogeneous Modalities into a Single Dense Encoder","display_name":"Omni-C: Compressing Heterogeneous Modalities into a Single Dense Encoder","publication_year":2026,"publication_date":"2026-02-27","ids":{"openalex":"https://openalex.org/W7134235616","doi":"https://doi.org/10.48550/arxiv.2603.05528"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.05528","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036176395","display_name":"Kin Wai Lau","orcid":"https://orcid.org/0000-0001-5364-5070"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lau, Kin Wai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008554311","display_name":"Yasar Abbas Ur Rehman","orcid":"https://orcid.org/0000-0002-2945-7181"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rehman, Yasar Abbas Ur","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038133707","display_name":"Lai-Man Po","orcid":"https://orcid.org/0000-0002-5185-1492"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Po, Lai-Man","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128502487","display_name":"Pedro Porto Buarque de Gusm\u00e3o","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"de Gusm\u00e3o, Pedro Porto Buarque","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5036176395"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3968999981880188,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.3968999981880188,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.3154999911785126,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.03840000182390213,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.720300018787384},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6448000073432922},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.579200029373169},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.545799970626831},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.536899983882904},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.39089998602867126},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.3650999963283539},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.3433000147342682},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.34060001373291016}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8216999769210815},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.720300018787384},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6448000073432922},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.579200029373169},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.545799970626831},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.536899983882904},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4302999973297119},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.39089998602867126},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.36550000309944153},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3650999963283539},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.35179999470710754},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.3433000147342682},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3294000029563904},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C158207573","wikidata":"https://www.wikidata.org/wiki/Q5747224","display_name":"Heterogeneous network","level":4,"score":0.30399999022483826},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.28999999165534973},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2802000045776367},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.275299996137619},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2694000005722046},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26570001244544983},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.2639999985694885},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.259799987077713},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.25850000977516174},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.25130000710487366},{"id":"https://openalex.org/C2779623668","wikidata":"https://www.wikidata.org/wiki/Q7652842","display_name":"SwIPe","level":2,"score":0.250900000333786}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.05528","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.05528","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.05528","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.05528","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"multimodal":[1,174],"systems":[2,106],"often":[3],"rely":[4],"on":[5,70,104,142],"separate":[6],"expert":[7,119,131],"modality":[8,109],"encoders":[9],"which":[10],"cause":[11],"linearly":[12],"scaling":[13],"complexity":[14],"and":[15,33,40,65,81,111,135,144,172],"computational":[16],"overhead":[17],"with":[18,30,138],"added":[19],"modalities.":[20],"While":[21],"unified":[22,159],"Omni-models":[23],"address":[24],"this":[25,45],"via":[26,107],"Mixture-of-Expert":[27],"(MoE)":[28],"architectures":[29],"specialized":[31,122],"experts":[32],"routing,":[34],"they":[35],"still":[36],"inflate":[37],"parameter":[38,76,155],"counts":[39],"introduce":[41],"routing":[42],"overhead.":[43],"In":[44],"paper,":[46],"we":[47],"propose":[48],"Omni-C":[49,87,126],"(Omni-Compress),":[50],"a":[51],"single":[52],"dense":[53],"Transformer-based":[54],"encoder":[55],"that":[56,146],"learns":[57],"competitive":[58],"shared":[59],"representations":[60],"across":[61],"heterogeneous":[62],"modalities--images,":[63],"audio,":[64],"text--through":[66],"unimodal":[67,134],"contrastive":[68],"pretraining":[69],"large-scale":[71],"unaligned":[72],"data.":[73],"By":[74],"maximizing":[75],"sharing":[77],"in":[78,133],"the":[79,115],"backbone":[80],"using":[82],"lightweight":[83,151],"modality-specific":[84],"projection":[85],"heads,":[86],"effectively":[88],"mitigates":[89],"inter-modality":[90],"conflicts":[91],"without":[92],"requiring":[93],"MoE,":[94],"paired":[95],"supervision,":[96],"or":[97,121,154],"routing.":[98],"This":[99],"design":[100],"supports":[101],"efficient":[102,156,171],"deployment":[103],"memory-constrained":[105],"sequential":[108],"processing":[110],"low-memory":[112],"inference,":[113],"eliminating":[114],"need":[116],"for":[117],"parallel":[118],"loading":[120],"hardware.":[123],"Experiments":[124],"show":[125],"achieves":[127],"performance":[128],"comparable":[129],"to":[130,167],"models":[132],"cross-model":[136],"tasks,":[137],"modest":[139],"zero-shot":[140],"degradation":[141],"audio":[143],"text":[145],"is":[147],"largely":[148],"recovered":[149],"through":[150],"linear":[152],"probing":[153],"fine-tuning.":[157],"The":[158],"architecture":[160],"substantially":[161],"reduces":[162],"inference":[163],"memory":[164],"usage":[165],"compared":[166],"multi-encoder":[168],"baselines,":[169],"advancing":[170],"scalable":[173],"learning.":[175]},"counts_by_year":[],"updated_date":"2026-05-03T08:25:01.440150","created_date":"2026-03-10T00:00:00"}
