{"id":"https://openalex.org/W4417051737","doi":"https://doi.org/10.1109/iccv51701.2025.00461","title":"Boosting MLLM Reasoning with Text-Debiased Hint-GRPO","display_name":"Boosting MLLM Reasoning with Text-Debiased Hint-GRPO","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4417051737","doi":"https://doi.org/10.1109/iccv51701.2025.00461"},"language":"en","primary_location":{"id":"doi:10.1109/iccv51701.2025.00461","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00461","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2503.23905","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089939846","display_name":"Qihan Huang","orcid":"https://orcid.org/0000-0002-2529-467X"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qihan Huang","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102266633","display_name":"Weilong Dai","orcid":"https://orcid.org/0009-0006-9288-932X"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weilong Dai","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100339168","display_name":"Jinlong Liu","orcid":"https://orcid.org/0000-0002-0284-5029"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinlong Liu","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008173176","display_name":"Wanggui He","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wanggui He","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112985048","display_name":"Hao Jiang","orcid":"https://orcid.org/0000-0003-1198-8643"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hao Jiang","raw_affiliation_strings":["Alibaba Group"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026532752","display_name":"Mingli Song","orcid":"https://orcid.org/0000-0003-2621-6048"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingli Song","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100770067","display_name":"Jingyuan Chen","orcid":"https://orcid.org/0000-0002-6039-0763"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingyuan Chen","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056066639","display_name":"Chang Yao","orcid":"https://orcid.org/0000-0002-1187-6257"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chang Yao","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047371218","display_name":"Jie Song","orcid":"https://orcid.org/0000-0003-3671-6521"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Song","raw_affiliation_strings":["Zhejiang University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3443908,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4848","last_page":"4857"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5706999897956848,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5706999897956848,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.11869999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.07199999690055847,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.6355999708175659},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5426999926567078},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.44339999556541443},{"id":"https://openalex.org/keywords/case-based-reasoning","display_name":"Case-based reasoning","score":0.4422999918460846},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.41499999165534973},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.382099986076355},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.37070000171661377},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.34470000863075256},{"id":"https://openalex.org/keywords/opportunistic-reasoning","display_name":"Opportunistic reasoning","score":0.3393000066280365}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7483999729156494},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.6355999708175659},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6230999827384949},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5426999926567078},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4700999855995178},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.44339999556541443},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.4422999918460846},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.41499999165534973},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.382099986076355},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.37070000171661377},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.34470000863075256},{"id":"https://openalex.org/C86827895","wikidata":"https://www.wikidata.org/wiki/Q7098582","display_name":"Opportunistic reasoning","level":4,"score":0.3393000066280365},{"id":"https://openalex.org/C37335422","wikidata":"https://www.wikidata.org/wiki/Q6888134","display_name":"Model-based reasoning","level":3,"score":0.3174999952316284},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.310699999332428},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3021000027656555},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C2780735816","wikidata":"https://www.wikidata.org/wiki/Q28324931","display_name":"Incremental learning","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.26899999380111694},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.25690001249313354},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iccv51701.2025.00461","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00461","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2503.23905","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.23905","pdf_url":"https://arxiv.org/pdf/2503.23905","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2503.23905","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2503.23905","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2503.23905","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2503.23905","pdf_url":"https://arxiv.org/pdf/2503.23905","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"MLLM":[0,107,118,191,201],"reasoning":[1,12,23,67,187,202],"has":[2,34],"drawn":[3],"widespread":[4],"research":[5],"for":[6,128,149],"its":[7],"excellent":[8],"problem-solving":[9],"capability.":[10],"Current":[11],"methods":[13,184],"fall":[14],"into":[15],"two":[16,77],"types:":[17],"PRM,":[18],"which":[19,27,43],"supervises":[20,28],"the":[21,29,36,81,86,106,117,162,186],"intermediate":[22],"steps,":[24],"and":[25,64,91,111,122,154],"ORM,":[26,42],"final":[30],"results.":[31],"Recently,":[32],"DeepSeek-R1":[33],"challenged":[35],"traditional":[37],"view":[38],"that":[39,79,98,116,141,157,181],"PRM":[40],"outperforms":[41],"demonstrates":[44],"strong":[45],"generalization":[46],"performance":[47,82,198],"using":[48],"an":[49],"ORM":[50],"method":[51],"(i.e.,":[52],"GRPO).":[53],"However,":[54],"current":[55],"MLLM's":[56],"GRPO":[57,84,99,131],"algorithms":[58],"still":[59],"struggle":[60],"to":[61,97,104,199],"handle":[62],"challenging":[63],"complex":[65],"multimodal":[66],"tasks":[68],"(e.g.,":[69],"mathematical":[70],"reasoning).":[71],"In":[72],"this":[73,137],"work,":[74],"we":[75],"reveal":[76],"problems":[78],"impede":[80],"of":[83,151,189],"on":[85,108,125,173],"MLLM:":[87],"Low":[88,93],"data":[89,94,143],"utilization":[90,95,144],"Text-bias.":[92],"refers":[96],"cannot":[100],"acquire":[101],"positive":[102],"rewards":[103],"update":[105],"difficult":[109],"samples,":[110],"text-bias":[112,155,159],"is":[113,206],"a":[114,193],"phenomenon":[115],"bypasses":[119],"image":[120,167],"condition":[121,127,168],"solely":[123],"relies":[124],"text":[126],"generation":[129],"after":[130],"training.":[132],"To":[133],"tackle":[134],"these":[135],"problems,":[136],"work":[138],"proposes":[139],"Hint-GRPO":[140],"improves":[142],"by":[145,160,192],"adaptively":[146],"providing":[147],"hints":[148],"samples":[150],"varying":[152],"difficulty,":[153],"calibration":[156],"mitigates":[158],"calibrating":[161],"token":[163],"prediction":[164],"logits":[165],"with":[166],"in":[169],"test-time.":[170],"Experiment":[171],"results":[172],"three":[174],"base":[175],"MLLMs":[176],"across":[177],"eleven":[178],"datasets":[179],"demonstrate":[180],"our":[182],"proposed":[183],"advance":[185],"capability":[188],"original":[190],"large":[194],"margin,":[195],"exhibiting":[196],"superior":[197],"existing":[200],"methods.":[203],"Our":[204],"code":[205],"available":[207],"at":[208],"https://github.com/hqhQAQ/Hint-GRPO.":[209]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
