{"id":"https://openalex.org/W7161038702","doi":"https://doi.org/10.48550/arxiv.2605.11400","title":"UniPath: Adaptive Coordination of Understanding and Generation for Unified Multimodal Reasoning","display_name":"UniPath: Adaptive Coordination of Understanding and Generation for Unified Multimodal Reasoning","publication_year":2026,"publication_date":"2026-05-12","ids":{"openalex":"https://openalex.org/W7161038702","doi":"https://doi.org/10.48550/arxiv.2605.11400"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.11400","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11400","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.11400","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136058810","display_name":"Hayes Bai","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Bai, Hayes","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136023968","display_name":"Yinyi Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Yinyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136058894","display_name":"Wenwen Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Wenwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136026783","display_name":"Qingsong Wen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Qingsong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136032116","display_name":"Jindong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jindong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5136058810"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.77920001745224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.77920001745224,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.05590000003576279,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.02889999933540821,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6089000105857849},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5626999735832214},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5508000254631042},{"id":"https://openalex.org/keywords/executor","display_name":"Executor","score":0.5368000268936157},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.46540001034736633},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.4417000114917755},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.37400001287460327},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.36000001430511475},{"id":"https://openalex.org/keywords/mode","display_name":"Mode (computer interface)","score":0.3490999937057495}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8090999722480774},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6089000105857849},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5626999735832214},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5508000254631042},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5400000214576721},{"id":"https://openalex.org/C180591056","wikidata":"https://www.wikidata.org/wiki/Q654437","display_name":"Executor","level":2,"score":0.5368000268936157},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.46540001034736633},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.4417000114917755},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.41350001096725464},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.37400001287460327},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.36000001430511475},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.3490999937057495},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3208000063896179},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.30399999022483826},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3018999993801117},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2964000105857849},{"id":"https://openalex.org/C16345878","wikidata":"https://www.wikidata.org/wiki/Q107472979","display_name":"Orientation (vector space)","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2816999852657318},{"id":"https://openalex.org/C84653758","wikidata":"https://www.wikidata.org/wiki/Q5575175","display_name":"Goal orientation","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C107257861","wikidata":"https://www.wikidata.org/wiki/Q656316","display_name":"Coordination game","level":2,"score":0.2736000120639801},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C2778701210","wikidata":"https://www.wikidata.org/wiki/Q28130034","display_name":"Constructive","level":3,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.11400","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11400","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.11400","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.11400","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Unified":[0],"multimodal":[1,58],"models":[2],"(UMMs)":[3],"aim":[4],"to":[5,19,78,116,128,139],"integrate":[6],"understanding":[7],"and":[8,28,89,107,121,133],"generation":[9],"within":[10],"a":[11,45,84,96,110,130,135],"single":[12,97],"architecture.":[13],"However,":[14],"it":[15],"remains":[16],"underexplored":[17],"how":[18],"effectively":[20],"coordinate":[21],"these":[22],"two":[23],"capabilities":[24],"for":[25,49,86],"more":[26],"effective":[27],"efficient":[29],"reasoning.":[30],"Existing":[31],"coordination":[32,47,68,98,154],"approaches":[33],"either":[34],"perform":[35],"coupling":[36],"during":[37],"training,":[38],"without":[39],"explicit":[40],"inference-time":[41],"coordination,":[42],"or":[43],"impose":[44],"fixed":[46,153],"pattern":[48],"all":[50],"inputs.":[51],"In":[52],"this":[53],"work,":[54],"we":[55,100],"show":[56,145],"that":[57,72,146],"tasks":[59],"exhibit":[60],"substantial":[61],"coordination-path":[62,91,148],"diversity:":[63],"different":[64,67],"inputs":[65],"favor":[66],"paths.":[69],"This":[70],"suggests":[71],"exploiting":[73,90],"such":[74],"diversity":[75,149],"is":[76,163],"key":[77],"improving":[79],"performance.":[80],"We":[81,124],"propose":[82],"UniPath,":[83],"framework":[85],"adaptively":[87],"modeling":[88],"diversity.":[92],"Instead":[93],"of":[94,109],"enforcing":[95],"pattern,":[99],"represent":[101],"task":[102],"solving":[103],"as":[104],"the":[105],"selection":[106],"execution":[108],"path,":[111],"ranging":[112],"from":[113],"direct":[114],"answering":[115],"textual":[117],"inference,":[118],"visual-thought":[119],"construction,":[120],"hypothesis-based":[122],"exploration.":[123],"construct":[125],"role-aligned":[126],"trajectories":[127],"train":[129],"path-conditioned":[131],"executor":[132],"introduce":[134],"lightweight":[136],"planner":[137],"mechanism":[138],"enable":[140],"input-dependent":[141],"path":[142],"selection.":[143],"Experiments":[144],"leveraging":[147],"improves":[150],"performance":[151],"over":[152],"strategies":[155],"while":[156],"providing":[157],"interpretable":[158],"intermediate":[159],"behaviors.":[160],"The":[161],"code":[162],"available":[164],"at:https://github.com/AIFrontierLab/TorchUMM/tree/main/src/umm/post_training/unipath.":[165]},"counts_by_year":[],"updated_date":"2026-05-14T06:16:12.342656","created_date":"2026-05-14T00:00:00"}
