{"id":"https://openalex.org/W7127881197","doi":"https://doi.org/10.48550/arxiv.2602.04144","title":"OMG-Agent: Toward Robust Missing Modality Generation with Decoupled Coarse-to-Fine Agentic Workflows","display_name":"OMG-Agent: Toward Robust Missing Modality Generation with Decoupled Coarse-to-Fine Agentic Workflows","publication_year":2026,"publication_date":"2026-02-04","ids":{"openalex":"https://openalex.org/W7127881197","doi":"https://doi.org/10.48550/arxiv.2602.04144"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.04144","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125194002","display_name":"Ruiting Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dai, Ruiting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125108736","display_name":"Zheyu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zheyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125104755","display_name":"Haoyu Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Haoyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119247096","display_name":"Yihan Liu","orcid":"https://orcid.org/0009-0007-6721-0204"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yihan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125209027","display_name":"Chengzhi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Chengzhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125228352","display_name":"Zekun Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zekun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125121160","display_name":"Zishan Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Zishan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125217129","display_name":"Jiaman Cen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cen, Jiaman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125177318","display_name":"Lisi Mo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mo, Lisi","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5125194002"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.20389999449253082,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.20389999449253082,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.15680000185966492,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.11800000071525574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5680000185966492},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.51910001039505},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.44620001316070557},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4309999942779541},{"id":"https://openalex.org/keywords/executor","display_name":"Executor","score":0.39399999380111694},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.3483000099658966},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.3393000066280365},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.33649998903274536},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.335999995470047},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.33489999175071716}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7702999711036682},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5680000185966492},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.550599992275238},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.51910001039505},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.44620001316070557},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4309999942779541},{"id":"https://openalex.org/C180591056","wikidata":"https://www.wikidata.org/wiki/Q654437","display_name":"Executor","level":2,"score":0.39399999380111694},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3483000099658966},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3393000066280365},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.33649998903274536},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.335999995470047},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.33489999175071716},{"id":"https://openalex.org/C2780704645","wikidata":"https://www.wikidata.org/wiki/Q9251458","display_name":"Observer (physics)","level":2,"score":0.3257000148296356},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32519999146461487},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.3073999881744385},{"id":"https://openalex.org/C2776544517","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Unexpected events","level":2,"score":0.3050999939441681},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3028999865055084},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C113336015","wikidata":"https://www.wikidata.org/wiki/Q574010","display_name":"Complete information","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2727999985218048},{"id":"https://openalex.org/C147494362","wikidata":"https://www.wikidata.org/wiki/Q2078905","display_name":"Troubleshooting","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.26930001378059387},{"id":"https://openalex.org/C134752490","wikidata":"https://www.wikidata.org/wiki/Q374182","display_name":"Logical consequence","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.2612000107765198},{"id":"https://openalex.org/C88516994","wikidata":"https://www.wikidata.org/wiki/Q1268863","display_name":"Dynamic time warping","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C2983203078","wikidata":"https://www.wikidata.org/wiki/Q255166","display_name":"Information gain","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.2549000084400177},{"id":"https://openalex.org/C121375916","wikidata":"https://www.wikidata.org/wiki/Q936559","display_name":"Principle of compositionality","level":2,"score":0.2542000114917755}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.04144","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.04144","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.04144","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.04144","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Data":[0],"incompleteness":[1],"severely":[2],"impedes":[3],"the":[4,72,92],"reliability":[5],"of":[6],"multimodal":[7],"systems.":[8],"Existing":[9],"reconstruction":[10],"methods":[11],"face":[12],"distinct":[13],"bottlenecks:":[14],"conventional":[15],"parametric/generative":[16],"models":[17],"are":[18,39],"prone":[19],"to":[20,23,77,142],"hallucinations":[21],"due":[22],"over-reliance":[24],"on":[25,151,170],"internal":[26],"memory,":[27],"while":[28],"retrieval-augmented":[29],"frameworks":[30],"struggle":[31],"with":[32],"retrieval":[33],"rigidity.":[34],"Critically,":[35],"these":[36],"end-to-end":[37],"architectures":[38],"fundamentally":[40],"constrained":[41],"by":[42],"Semantic-Detail":[43],"Entanglement":[44],"--":[45],"a":[46,67,78,85,112,118,131,167],"structural":[47],"conflict":[48],"between":[49],"logical":[50],"reasoning":[51],"and":[52,129,145],"signal":[53],"synthesis":[54],"that":[55,70,103,122,134,155],"compromises":[56],"fidelity.":[57],"In":[58],"this":[59],"paper,":[60],"we":[61],"present":[62],"\\textbf{\\underline{O}}mni-\\textbf{\\underline{M}}odality":[63],"\\textbf{\\underline{G}}eneration":[64],"Agent":[65],"(\\textbf{OMG-Agent}),":[66],"novel":[68],"framework":[69],"shifts":[71],"paradigm":[73],"from":[74],"static":[75],"mapping":[76],"dynamic":[79],"coarse-to-fine":[80],"Agentic":[81],"Workflow.":[82],"By":[83],"mimicking":[84],"\\textit{deliberate-then-act}":[86],"cognitive":[87],"process,":[88],"OMG-Agent":[89,156],"explicitly":[90],"decouples":[91],"task":[93],"into":[94],"three":[95],"synergistic":[96],"stages:":[97],"(1)":[98],"an":[99],"MLLM-driven":[100],"Semantic":[101],"Planner":[102],"resolves":[104],"input":[105],"ambiguity":[106],"via":[107],"Progressive":[108],"Contextual":[109],"Reasoning,":[110],"creating":[111],"deterministic":[113],"structured":[114],"semantic":[115],"plan;":[116],"(2)":[117],"non-parametric":[119],"Evidence":[120],"Retriever":[121],"grounds":[123],"abstract":[124],"semantics":[125],"in":[126],"external":[127],"knowledge;":[128],"(3)":[130],"Retrieval-Injected":[132],"Executor":[133],"utilizes":[135],"retrieved":[136],"evidence":[137],"as":[138],"flexible":[139],"feature":[140],"prompts":[141],"overcome":[143],"rigidity":[144],"synthesize":[146],"high-fidelity":[147],"details.":[148],"Extensive":[149],"experiments":[150],"multiple":[152],"benchmarks":[153],"demonstrate":[154],"consistently":[157],"surpasses":[158],"state-of-the-art":[159],"methods,":[160],"maintaining":[161],"robustness":[162],"under":[163],"extreme":[164],"missingness,":[165],"e.g.,":[166],"$2.6$-point":[168],"gain":[169],"CMU-MOSI":[171],"at":[172],"$70$\\%":[173],"missing":[174],"rates.":[175]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-07T00:00:00"}
