{"id":"https://openalex.org/W7154713166","doi":"https://doi.org/10.48550/arxiv.2604.14568","title":"Learning Adaptive Reasoning Paths for Efficient Visual Reasoning","display_name":"Learning Adaptive Reasoning Paths for Efficient Visual Reasoning","publication_year":2026,"publication_date":"2026-04-16","ids":{"openalex":"https://openalex.org/W7154713166","doi":"https://doi.org/10.48550/arxiv.2604.14568"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.14568","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14568","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.14568","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133849283","display_name":"Yixu Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Yixu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133844528","display_name":"Tinghui Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Tinghui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133898271","display_name":"Muhao Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Muhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0007999999797903001,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.0007999999797903001,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.7509999871253967},{"id":"https://openalex.org/keywords/analytic-reasoning","display_name":"Analytic reasoning","score":0.5964999794960022},{"id":"https://openalex.org/keywords/opportunistic-reasoning","display_name":"Opportunistic reasoning","score":0.5889000296592712},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.5626999735832214},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.4934999942779541},{"id":"https://openalex.org/keywords/qualitative-reasoning","display_name":"Qualitative reasoning","score":0.4867999851703644},{"id":"https://openalex.org/keywords/adaptive-reasoning","display_name":"Adaptive reasoning","score":0.4684000015258789},{"id":"https://openalex.org/keywords/deductive-reasoning","display_name":"Deductive reasoning","score":0.44909998774528503},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.44620001316070557},{"id":"https://openalex.org/keywords/model-based-reasoning","display_name":"Model-based reasoning","score":0.4316999912261963}],"concepts":[{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.7509999871253967},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7361000180244446},{"id":"https://openalex.org/C103057564","wikidata":"https://www.wikidata.org/wiki/Q4751139","display_name":"Analytic reasoning","level":3,"score":0.5964999794960022},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.5889000296592712},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.5626999735832214},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5595999956130981},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.4934999942779541},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.4867999851703644},{"id":"https://openalex.org/C107848011","wikidata":"https://www.wikidata.org/wiki/Q4680756","display_name":"Adaptive reasoning","level":4,"score":0.4684000015258789},{"id":"https://openalex.org/C97364631","wikidata":"https://www.wikidata.org/wiki/Q484284","display_name":"Deductive reasoning","level":2,"score":0.44909998774528503},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.44620001316070557},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.4316999912261963},{"id":"https://openalex.org/C36964233","wikidata":"https://www.wikidata.org/wiki/Q7920942","display_name":"Verbal reasoning","level":3,"score":0.4056999981403351},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.4032000005245209},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.40299999713897705},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.39419999718666077},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.38359999656677246},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.37369999289512634},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3635999858379364},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.357699990272522},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.33649998903274536},{"id":"https://openalex.org/C43971567","wikidata":"https://www.wikidata.org/wiki/Q3142865","display_name":"Logical reasoning","level":2,"score":0.32839998602867126},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.3003000020980835},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2946999967098236},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.29330000281333923},{"id":"https://openalex.org/C183521366","wikidata":"https://www.wikidata.org/wiki/Q7256422","display_name":"Psychology of reasoning","level":4,"score":0.28850001096725464},{"id":"https://openalex.org/C2779321571","wikidata":"https://www.wikidata.org/wiki/Q7936605","display_name":"Visual learning","level":2,"score":0.28529998660087585},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C178278151","wikidata":"https://www.wikidata.org/wiki/Q7936607","display_name":"Visual memory","level":3,"score":0.266400009393692},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.25929999351501465},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.25859999656677246}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.14568","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14568","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.14568","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14568","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Visual":[0],"reasoning":[1,9,27,51,62,67,118,150],"models":[2,82],"(VRMs)":[3],"have":[4],"recently":[5],"shown":[6],"strong":[7],"cross-modal":[8],"capabilities":[10],"by":[11,134],"integrating":[12],"visual":[13,41,44,61,66,72,149],"perception":[14],"with":[15,100],"language":[16],"reasoning.":[17],"However,":[18],"they":[19],"often":[20],"suffer":[21],"from":[22],"overthinking,":[23],"producing":[24],"unnecessarily":[25],"long":[26],"chains":[28],"for":[29],"any":[30],"tasks.":[31,143],"We":[32],"attribute":[33],"this":[34],"issue":[35],"to":[36,83,113],"\\textbf{Reasoning":[37],"Path":[38],"Redundancy}":[39],"in":[40,141,155],"reasoning:":[42],"many":[43],"questions":[45],"do":[46],"not":[47],"require":[48],"the":[49,111,115],"full":[50],"process.":[52],"To":[53],"address":[54],"this,":[55],"we":[56],"propose":[57],"\\textbf{AVR},":[58],"an":[59,102],"adaptive":[60,148],"framework":[63],"that":[64,109,129,147],"decomposes":[65],"into":[68],"three":[69,87],"cognitive":[70],"functions:":[71],"perception,":[73],"logical":[74],"reasoning,":[75],"and":[76,94,158],"answer":[77],"application.":[78],"It":[79],"further":[80],"enables":[81],"dynamically":[84],"choose":[85],"among":[86],"response":[88],"formats:":[89],"Full":[90],"Format,":[91,93],"Perception-Only":[92],"Direct":[95],"Answer.":[96],"AVR":[97,130],"is":[98],"trained":[99],"FS-GRPO,":[101],"adaptation":[103],"of":[104],"Group":[105],"Relative":[106],"Policy":[107],"Optimization":[108],"encourages":[110],"model":[112],"select":[114],"most":[116],"efficient":[117],"format":[119],"while":[120,136],"preserving":[121],"correctness.":[122],"Experiments":[123],"on":[124],"multiple":[125],"vision-language":[126],"benchmarks":[127],"show":[128],"reduces":[131],"token":[132],"usage":[133],"50--90\\%":[135],"maintaining":[137],"overall":[138],"accuracy,":[139],"especially":[140],"perception-intensive":[142],"These":[144],"results":[145],"demonstrate":[146],"can":[151],"effectively":[152],"mitigate":[153],"overthinking":[154],"VRMs.":[156],"Code":[157],"data":[159],"are":[160],"available":[161],"at:":[162],"https://github.com/RunRiotComeOn/AVR.":[163]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-18T00:00:00"}
