{"id":"https://openalex.org/W7133329048","doi":"https://doi.org/10.48550/arxiv.2603.01106","title":"DIVA-GRPO: Enhancing Multimodal Reasoning through Difficulty-Adaptive Variant Advantage","display_name":"DIVA-GRPO: Enhancing Multimodal Reasoning through Difficulty-Adaptive Variant Advantage","publication_year":2026,"publication_date":"2026-03-01","ids":{"openalex":"https://openalex.org/W7133329048","doi":"https://doi.org/10.48550/arxiv.2603.01106"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.01106","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01106","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.01106","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128010828","display_name":"Haowen Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gao, Haowen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127883299","display_name":"Zhenyu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhenyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127966239","display_name":"Liang Pang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pang, Liang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054549656","display_name":"Fangda Guo","orcid":"https://orcid.org/0000-0003-2401-6499"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Fangda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018581247","display_name":"Hongjian Dou Hongjian Dou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dou, Hongjian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127961887","display_name":"Guannan Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Guannan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127995390","display_name":"Shaoguo Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Shaoguo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127929319","display_name":"Tingting Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Tingting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128005762","display_name":"Huawei Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Huawei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127932525","display_name":"Xueqi Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Xueqi","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5128010828"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.3594000041484833,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.3594000041484833,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.10040000081062317,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.09830000251531601,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5640000104904175},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.5206999778747559},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.35510000586509705},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.2937000095844269},{"id":"https://openalex.org/keywords/group","display_name":"Group (periodic table)","score":0.2849999964237213}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7038999795913696},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.614799976348877},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5640000104904175},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.5206999778747559},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4970000088214874},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.35510000586509705},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2937000095844269},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.2849999964237213},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2606000006198883},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.01106","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01106","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.01106","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01106","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"(RL)":[2],"with":[3,110],"group":[4],"relative":[5],"policy":[6],"optimization":[7,81],"(GRPO)":[8],"has":[9],"become":[10],"a":[11,32,89,100],"widely":[12],"adopted":[13],"approach":[14],"for":[15,52],"enhancing":[16],"the":[17],"reasoning":[18,30,142,154],"capabilities":[19],"of":[20],"multimodal":[21],"large":[22],"language":[23],"models":[24],"(MLLMs).":[25],"While":[26],"GRPO":[27],"enables":[28],"long-chain":[29],"without":[31],"critic,":[33],"it":[34],"often":[35,68],"suffers":[36],"from":[37,99],"sparse":[38],"rewards":[39,48],"on":[40,140],"difficult":[41],"problems":[42],"and":[43,64,114,119,124,131,153],"advantage":[44,92,132],"vanishing":[45,133],"when":[46],"group-level":[47],"are":[49],"too":[50],"consistent":[51],"overly":[53],"easy":[54],"or":[55],"hard":[56],"problems.":[57],"Existing":[58],"solutions":[59],"(sample":[60],"expansion,":[61],"selective":[62],"utilization,":[63],"indirect":[65],"reward":[66,76,129],"design)":[67],"fail":[69],"to":[70,78],"maintain":[71],"enough":[72],"variance":[73],"in":[74,150],"within-group":[75],"distributions":[77,98],"yield":[79],"clear":[80],"signals.":[82],"To":[83],"address":[84],"this,":[85],"we":[86],"propose":[87],"DIVA-GRPO,":[88],"difficulty-adaptive":[90],"variant":[91,96],"method":[93],"that":[94,145],"adjusts":[95],"difficulty":[97,112],"global":[101,120],"perspective.":[102],"DIVA-GRPO":[103,146],"dynamically":[104],"assesses":[105],"problem":[106],"difficulty,":[107],"samples":[108],"variants":[109],"appropriate":[111],"levels,":[113],"calculates":[115],"advantages":[116],"across":[117],"local":[118],"groups":[121],"using":[122],"difficulty-weighted":[123],"normalized":[125],"scaling.":[126],"This":[127],"alleviates":[128],"sparsity":[130],"while":[134],"improving":[135],"training":[136,151],"stability.":[137],"Extensive":[138],"experiments":[139],"six":[141],"benchmarks":[143],"demonstrate":[144],"outperforms":[147],"existing":[148],"approaches":[149],"efficiency":[152],"performance.":[155],"Code:":[156],"https://github.com/Siaaaaaa1/DIVA-GRPO":[157]},"counts_by_year":[],"updated_date":"2026-03-04T07:09:34.246503","created_date":"2026-03-04T00:00:00"}
