{"id":"https://openalex.org/W7154030756","doi":"https://doi.org/10.48550/arxiv.2604.08936","title":"M-IDoL: Information Decomposition for Modality-Specific and Diverse Representation Learning in Medical Foundation Model","display_name":"M-IDoL: Information Decomposition for Modality-Specific and Diverse Representation Learning in Medical Foundation Model","publication_year":2026,"publication_date":"2026-04-10","ids":{"openalex":"https://openalex.org/W7154030756","doi":"https://doi.org/10.48550/arxiv.2604.08936"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.08936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.08936","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101847419","display_name":"Yihang Liu","orcid":"https://orcid.org/0000-0003-4257-2528"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Yihang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101647485","display_name":"Ying Wen","orcid":"https://orcid.org/0000-0002-6974-5110"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Longzhen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026565856","display_name":"Jiaxiong Yang","orcid":"https://orcid.org/0000-0002-7499-2474"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jiaxiong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085584344","display_name":"Longzhen Yang","orcid":"https://orcid.org/0000-0002-5791-145X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Ying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043341322","display_name":"Lianghua He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Lianghua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133534015","display_name":"Heng Tao Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Heng Tao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101847419"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10392","display_name":"Cutaneous Melanoma Detection and Management","score":0.44589999318122864,"subfield":{"id":"https://openalex.org/subfields/2730","display_name":"Oncology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10392","display_name":"Cutaneous Melanoma Detection and Management","score":0.44589999318122864,"subfield":{"id":"https://openalex.org/subfields/2730","display_name":"Oncology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.1023000031709671,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.07289999723434448,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5590999722480774},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5378000140190125},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4788999855518341},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.44909998774528503},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.43970000743865967},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4066999852657318},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.39820000529289246},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.3887999951839447},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.3359000086784363}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6897000074386597},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6187000274658203},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5590999722480774},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5378000140190125},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4788999855518341},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.453900009393692},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.44909998774528503},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.43970000743865967},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4066999852657318},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.39820000529289246},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3887999951839447},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.3359000086784363},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.3359000086784363},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.3230000138282776},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3203999996185303},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.30720001459121704},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C87868495","wikidata":"https://www.wikidata.org/wiki/Q750843","display_name":"Information processing","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C534262118","wikidata":"https://www.wikidata.org/wiki/Q177719","display_name":"Medical diagnosis","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C12362212","wikidata":"https://www.wikidata.org/wiki/Q728435","display_name":"Linear subspace","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.25609999895095825},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2515000104904175},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.08936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.08936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.08936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.557483434677124},{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4467257857322693}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Medical":[0],"foundation":[1,126],"models":[2,127],"(MFMs)":[3],"aim":[4],"to":[5,17,40,81,101],"learn":[6],"universal":[7],"representations":[8,33,75],"from":[9,27],"multimodal":[10,32,62,74],"medical":[11,112],"images":[12],"that":[13,30,57],"can":[14],"generalize":[15],"effectively":[16],"diverse":[18,144],"downstream":[19,121],"clinical":[20,122],"tasks.":[21],"However,":[22],"most":[23],"existing":[24],"MFMs":[25],"suffer":[26],"information":[28],"ambiguity":[29],"blends":[31],"in":[34],"a":[35,54],"single":[36],"embedding":[37],"space,":[38],"leading":[39],"the":[41],"degradation":[42],"of":[43,149],"modality":[44],"specificity":[45,84],"and":[46,87,137,139,143,154],"diversity.":[47],"In":[48],"this":[49],"paper,":[50],"we":[51],"propose":[52],"M-IDoL,":[53],"self-supervised":[55],"MFM":[56],"introduces":[58],"Information":[59],"Decomposition":[60],"for":[61],"representation":[63,83,103],"Learning":[64],"via":[65],"two":[66],"objectives:":[67],"i)":[68,115],"maximizing":[69],"inter-modality":[70],"entropy":[71],"by":[72,92],"dispersing":[73],"into":[76],"separable":[77],"Mixture-of-Experts":[78],"(MoE)":[79],"subspaces":[80],"achieve":[82],"across":[85,119,152],"modalities;":[86],"ii)":[88,140],"minimizing":[89],"intra-modality":[90],"uncertainty":[91],"performing":[93],"fine-grained":[94],"semantic":[95],"discrimination":[96,157],"within":[97,158],"each":[98,159],"MoE":[99],"subspace":[100],"enrich":[102],"diversity":[104],"per":[105],"modality.":[106,160],"By":[107],"pre-training":[108],"on":[109,128],"1.15":[110],"million":[111],"images,":[113],"M-IDoL":[114],"delivers":[116],"superior":[117],"generalization":[118],"21":[120],"tasks,":[123],"outperforming":[124],"20":[125],"five":[129],"imaging":[130],"modalities":[131,153],"(e.g.,":[132],"X-ray,":[133],"fundus,":[134],"OCT,":[135],"dermoscopy":[136],"pathology),":[138],"learns":[141],"modality-specific":[142],"representations,":[145],"showing":[146],"clearer":[147],"separation":[148],"feature":[150,156],"clusters":[151],"finer-grained":[155]},"counts_by_year":[],"updated_date":"2026-05-20T06:11:20.791850","created_date":"2026-04-14T00:00:00"}
