{"id":"https://openalex.org/W7151417205","doi":"https://doi.org/10.48550/arxiv.2604.04379","title":"Reinforce to Learn, Elect to Reason: A Dual Paradigm for Video Reasoning","display_name":"Reinforce to Learn, Elect to Reason: A Dual Paradigm for Video Reasoning","publication_year":2026,"publication_date":"2026-04-06","ids":{"openalex":"https://openalex.org/W7151417205","doi":"https://doi.org/10.48550/arxiv.2604.04379"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.04379","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04379","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.04379","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007698575","display_name":"Songyuan Yang","orcid":"https://orcid.org/0009-0009-9291-6369"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yang, Songyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133102251","display_name":"Weijiang Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Weijiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133114662","display_name":"Jilin Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Jilin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133118019","display_name":"Ziyu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ziyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010970833","display_name":"Guijian Tang","orcid":"https://orcid.org/0000-0003-4022-1142"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Guijian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133140995","display_name":"Wenjing Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Wenjing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004941473","display_name":"Huibin Tan","orcid":"https://orcid.org/0000-0003-4060-8793"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tan, Huibin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133117077","display_name":"Nong Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Nong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5007698575"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8738999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8738999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.07339999824762344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.010499999858438969,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8618999719619751},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6678000092506409},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5171999931335449},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4862000048160553},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4122999906539917},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.40619999170303345},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.40450000762939453}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8618999719619751},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7529000043869019},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6678000092506409},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.571399986743927},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5171999931335449},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4862000048160553},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.453900009393692},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4122999906539917},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.40619999170303345},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.40450000762939453},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.36500000953674316},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.3337000012397766},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3127000033855438},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2660999894142151}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.04379","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04379","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.04379","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04379","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.4852675795555115,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Video":[0],"reasoning":[1,25,70,81,101],"has":[2],"advanced":[3],"with":[4,57],"large":[5],"multimodal":[6],"models":[7],"(LMMs),":[8],"yet":[9],"their":[10,119],"inference":[11,222],"is":[12,26,223],"often":[13],"a":[14,37,48,107,112,136,199,209,224],"single":[15],"pass":[16],"that":[17,40,110],"returns":[18],"an":[19,182],"answer":[20],"without":[21,153],"verifying":[22],"whether":[23],"the":[24,55,92,142,155,175],"evidence-aligned.":[27],"We":[28,157],"introduce":[29],"Reinforce":[30],"to":[31,34,43,94,227],"Learn,":[32],"Elect":[33],"Reason":[35],"(RLER),":[36],"dual":[38],"paradigm":[39],"decouples":[41],"learning":[42,60,216],"produce":[44],"evidence":[45,98,127,213,220],"from":[46],"obtaining":[47],"reliable":[49],"answer.":[50],"In":[51,103],"RLER-Training,":[52],"we":[53,105],"optimize":[54],"policy":[56],"group-relative":[58],"reinforcement":[59],"(RL)":[61],"and":[62,79,83,99,121,131,133,146,151,164,180,204,217],"3":[63],"novel":[64],"task-driven":[65],"rewards:":[66],"Frame-sensitive":[67],"reward":[68,76,85],"grounds":[69],"on":[71,167,192],"explicit":[72,214],"key":[73],"frames,":[74,123],"Think-transparency":[75],"shapes":[77],"readable":[78],"parsable":[80],"traces,":[82],"Anti-repetition":[84],"boosts":[86],"information":[87],"density.":[88],"These":[89],"signals":[90],"teach":[91],"model":[93],"emit":[95],"structured,":[96],"machine-checkable":[97],"potentiate":[100],"capabilities.":[102],"RLER-Inference,":[104],"apply":[106],"train-free":[108],"orchestrator":[109],"generates":[111],"small":[113],"set":[114],"of":[115,174,185],"diverse":[116],"candidates,":[117],"parses":[118],"answers":[120],"cited":[122],"scores":[124],"them":[125],"by":[126,219],"consistency,":[128],"confidence,":[129],"transparency,":[130],"non-redundancy,":[132],"then":[134],"performs":[135],"robust":[137,225],"evidence-weighted":[138],"election.":[139],"This":[140],"closes":[141],"loop":[143],"between":[144,202],"producing":[145],"using":[147,191],"evidence,":[148],"improving":[149],"reliability":[150],"interpretability":[152],"enlarging":[154],"model.":[156],"comprehensively":[158],"evaluate":[159],"RLER":[160,171],"against":[161],"various":[162],"open-source":[163],"RL-based":[165],"LMMs":[166],"8":[168],"representative":[169],"benchmarks.":[170],"achieves":[172],"state":[173],"art":[176],"across":[177],"all":[178],"benchmarks":[179],"delivers":[181],"average":[183,193],"improvement":[184],"6.3\\%":[186],"over":[187],"base":[188],"models,":[189],"while":[190],"3.1":[194],"candidates":[195],"per":[196],"question,":[197],"indicating":[198],"favorable":[200],"balance":[201],"compute":[203],"quality.":[205],"The":[206],"results":[207],"support":[208],"simple":[210],"thesis:":[211],"making":[212],"during":[215,221],"electing":[218],"path":[226],"trustworthy":[228],"video":[229],"reasoning.":[230]},"counts_by_year":[],"updated_date":"2026-04-08T06:07:18.267832","created_date":"2026-04-08T00:00:00"}
