{"id":"https://openalex.org/W4403574001","doi":"https://doi.org/10.1007/s00354-026-00321-z","title":"Enhancing Unimodal Latent Representations in Multimodal VAEs Through Iterative Amortized Inference","display_name":"Enhancing Unimodal Latent Representations in Multimodal VAEs Through Iterative Amortized Inference","publication_year":2026,"publication_date":"2026-04-06","ids":{"openalex":"https://openalex.org/W4403574001","doi":"https://doi.org/10.1007/s00354-026-00321-z"},"language":"en","primary_location":{"id":"doi:10.1007/s00354-026-00321-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00354-026-00321-z","pdf_url":null,"source":{"id":"https://openalex.org/S165364243","display_name":"New Generation Computing","issn_l":"0288-3635","issn":["0288-3635","1882-7055"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"New Generation Computing","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1007/s00354-026-00321-z","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114135699","display_name":"Yuta Oshima","orcid":"https://orcid.org/0009-0006-6016-3866"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuta Oshima","raw_affiliation_strings":[],"raw_orcid":"https://orcid.org/0009-0006-6016-3866","affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038802330","display_name":"Masahiro Suzuki","orcid":"https://orcid.org/0000-0001-8519-5617"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Masahiro Suzuki","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5090592819","display_name":"Yutaka Matsuo","orcid":"https://orcid.org/0000-0001-9084-9670"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yutaka Matsuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":2490,"currency":"EUR","value_usd":3090},"apc_paid":{"value":2490,"currency":"EUR","value_usd":3090},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0027985,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"44","issue":"2","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.965399980545044,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7549921274185181},{"id":"https://openalex.org/keywords/amortized-analysis","display_name":"Amortized analysis","score":0.5967974066734314},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4655628204345703},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42324894666671753},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3356882929801941},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12432911992073059},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.09420177340507507}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7549921274185181},{"id":"https://openalex.org/C142417499","wikidata":"https://www.wikidata.org/wiki/Q331716","display_name":"Amortized analysis","level":3,"score":0.5967974066734314},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4655628204345703},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42324894666671753},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3356882929801941},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12432911992073059},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.09420177340507507}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1007/s00354-026-00321-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00354-026-00321-z","pdf_url":null,"source":{"id":"https://openalex.org/S165364243","display_name":"New Generation Computing","issn_l":"0288-3635","issn":["0288-3635","1882-7055"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"New Generation Computing","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2410.11403","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.11403","pdf_url":"https://arxiv.org/pdf/2410.11403","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.11403","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.11403","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s00354-026-00321-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00354-026-00321-z","pdf_url":null,"source":{"id":"https://openalex.org/S165364243","display_name":"New Generation Computing","issn_l":"0288-3635","issn":["0288-3635","1882-7055"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"New Generation Computing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6727313248","display_name":null,"funder_award_id":"J23H04974","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4391375266","https://openalex.org/W1979597421","https://openalex.org/W2007980826","https://openalex.org/W2061531152","https://openalex.org/W3002753104","https://openalex.org/W2077600819","https://openalex.org/W2142036596","https://openalex.org/W2141466631"],"abstract_inverted_index":{"Abstract":[0],"In":[1],"recent":[2],"years,":[3],"deep":[4],"generative":[5],"models":[6,109,119,137,160,185,248],"for":[7,46,102],"multimodal":[8,16,47,61,129,153,163,176,219,223,231,267,276],"data":[9],"have":[10,20],"gained":[11],"significant":[12],"attention.":[13],"Among":[14],"these,":[15],"variational":[17],"autoencoders":[18],"(VAEs)":[19],"emerged":[21],"as":[22,116,120,325],"a":[23,29,60,103,162],"promising":[24],"approach,":[25],"aiming":[26],"to":[27,92,127,147,156,243,263],"capture":[28,319],"shared":[30],"latent":[31],"representation":[32],"by":[33,113,166,258,305,327],"integrating":[34],"information":[35,140,240,277],"across":[36],"different":[37,79],"modalities":[38,57,133,245],"through":[39],"their":[40],"inference":[41,62,80,118,149,159,164,198,215,232,262,284,302],"models.":[42,256],"A":[43],"primary":[44],"challenge":[45,112],"VAEs":[48,154],"is":[49,88,100,180],"accurately":[50],"inferring":[51],"representations":[52,317,347],"from":[53,139,188,193,348],"arbitrary":[54],"subsets":[55],"of":[56,90,97,106,143,203,300,322],"after":[58],"learning":[59],"model.":[63],"Naively,":[64],"this":[65,111,236,265],"would":[66],"require":[67],"training":[68],"$$":[69,71],"2^M":[70],"<mml:math":[72,84],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">":[73,85],"<mml:msup>":[74],"<mml:mn>2</mml:mn>":[75],"<mml:mi>M</mml:mi>":[76,86],"</mml:msup>":[77],"</mml:math>":[78,87],"networks":[81],"(":[82],"$$M$$":[83],"#":[89],"modalities)":[91],"handle":[93],"every":[94],"possible":[95],"combination":[96],"modalities,":[98,123,235,324],"which":[99,179],"infeasible":[101],"large":[104],"number":[105],"modalities.":[107],"Mixture-based":[108],"address":[110,207],"requiring":[114,279],"only":[115,280],"many":[117],"there":[121],"are":[122,134],"aggregating":[124],"unimodal":[125,158,204,261,271,281,349],"inferences":[126,272],"perform":[128],"inference.":[130,205,226],"However,":[131],"when":[132],"missing,":[135],"these":[136,184],"suffer":[138],"loss,":[141],"particularly":[142],"modality-specific":[144],"information,":[145],"leading":[146],"deteriorated":[148],"performance.":[150],"Alternatively,":[151],"alignment-based":[152,255],"aim":[155],"align":[157],"with":[161],"model":[165],"minimizing":[167],"the":[168,175,196,201,218,230,239,251,260,294,298,301,315,320,340,345],"Kullback\u2013Leibler":[169],"(KL)":[170],"divergence":[171],"between":[172],"them.":[173],"Yet,":[174],"amortized":[177,214,225],"inference,":[178],"alignment":[181],"source":[182],"in":[183,246,254,334],"inherently":[186],"suffers":[187],"amortization":[189,252],"gaps,":[190],"preventing":[191],"it":[192],"perfectly":[194],"approximating":[195],"true":[197],"and":[199,249,310,313],"compromising":[200],"accuracy":[202,309],"To":[206],"both":[208],"issues,":[209],"we":[210,269],"introduce":[211],"an":[212],"iterative":[213,224],"mechanism":[216],"within":[217],"VAE":[220],"framework,":[221],"termed":[222],"By":[227],"iteratively":[228],"refining":[229],"using":[233],"all":[234],"method":[237,296],"overcomes":[238],"loss":[241],"due":[242],"missing":[244],"mixture-based":[247],"minimizes":[250],"gap":[253],"Furthermore,":[257],"aligning":[259],"approximate":[264],"refined":[266],"posterior,":[268],"obtain":[270],"that":[273,293,314,339],"effectively":[274,318],"incorporate":[275],"while":[278],"inputs":[282],"at":[283],"time.":[285],"Experimental":[286],"results":[287],"on":[288],"two":[289],"benchmark":[290],"datasets":[291],"demonstrate":[292],"proposed":[295,341],"improves":[297],"performance":[299],"itself,":[303],"suggested":[304],"higher":[306],"linear":[307],"classification":[308],"cosine":[311],"similarity,":[312],"learned":[316],"distributions":[321],"other":[323],"indicated":[326],"lower":[328],"Fr\u00e9chet":[329],"Inception":[330],"Distance":[331],"(FID)":[332],"scores":[333],"cross-modal":[335],"generation.":[336],"This":[337],"indicates":[338],"approach":[342],"significantly":[343],"enhances":[344],"inferred":[346],"inputs.":[350]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
