{"id":"https://openalex.org/W7127431455","doi":"https://doi.org/10.48550/arxiv.2602.00759","title":"Adaptive Ability Decomposing for Unlocking Large Reasoning Model Effective Reinforcement Learning","display_name":"Adaptive Ability Decomposing for Unlocking Large Reasoning Model Effective Reinforcement Learning","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7127431455","doi":"https://doi.org/10.48550/arxiv.2602.00759"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.00759","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124940143","display_name":"Zhipeng Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Zhipeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124907424","display_name":"Xiaobo Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Xiaobo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124962888","display_name":"Wayne Xin Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Wayne Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041992434","display_name":"Youbin Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Youbin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124958965","display_name":"Ji-Rong Wen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Ji-Rong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5124940143"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.49869999289512634,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.49869999289512634,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.08269999921321869,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.08129999786615372,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decomposer","display_name":"Decomposer","score":0.7738000154495239},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6875},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6438000202178955},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6370000243186951},{"id":"https://openalex.org/keywords/semantic-reasoner","display_name":"Semantic reasoner","score":0.4893999993801117},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.4465999901294708},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.38420000672340393}],"concepts":[{"id":"https://openalex.org/C104170005","wikidata":"https://www.wikidata.org/wiki/Q842391","display_name":"Decomposer","level":3,"score":0.7738000154495239},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7408000230789185},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6875},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6438000202178955},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6370000243186951},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6208999752998352},{"id":"https://openalex.org/C9616225","wikidata":"https://www.wikidata.org/wiki/Q3929429","display_name":"Semantic reasoner","level":2,"score":0.4893999993801117},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48559999465942383},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.4465999901294708},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.38420000672340393},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3386000096797943},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.310699999332428},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.30070000886917114},{"id":"https://openalex.org/C171560490","wikidata":"https://www.wikidata.org/wiki/Q3374857","display_name":"Convergent thinking","level":4,"score":0.27900001406669617},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.00759","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.00759","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.00759","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.00759","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"with":[2,122,134],"verifiable":[3],"rewards":[4],"(RLVR)":[5],"has":[6],"shown":[7],"great":[8],"potential":[9],"to":[10,22,90,105,155],"enhance":[11],"the":[12,23,30,33,55,74,112,118,165,169,187],"reasoning":[13],"ability":[14],"of":[15,26,76,97,164,180],"large":[16],"language":[17],"models":[18],"(LLMs).":[19],"However,":[20],"due":[21],"limited":[24],"amount":[25],"information":[27,53],"provided":[28],"during":[29],"RLVR":[31,56,85,121,157,170],"process,":[32],"model":[34],"can":[35,152],"only":[36],"engage":[37],"in":[38,45,111],"largely":[39],"blind":[40],"exploration,":[41],"which":[42,178],"often":[43],"results":[44],"failure":[46],"on":[47,60],"challenging":[48],"problems.":[49],"To":[50,125],"provide":[51],"additional":[52],"for":[54,72,108,185],"process":[57,171],"without":[58,86],"relying":[59],"a":[61,82,95,148],"teacher":[62],"model,":[63],"we":[64,79,101,129,141,160],"propose":[65],"A$^2$D,":[66,128],"an":[67,162],"Adaptive":[68],"Ability":[69],"Decomposing":[70],"method":[71,145],"enhancing":[73,186],"effectiveness":[75],"RLVR.":[77],"Specifically,":[78],"first":[80,130],"train":[81,117],"decomposer":[83,104],"via":[84],"distillation,":[87],"enabling":[88],"it":[89],"decompose":[91],"complex":[92],"questions":[93],"into":[94],"set":[96],"simpler":[98],"sub-questions.":[99],"Next,":[100,140],"use":[102],"this":[103],"annotate":[106],"sub-questions":[107],"each":[109],"question":[110],"training":[113],"dataset,":[114],"and":[115,175,177,190],"then":[116],"reasoner":[119],"under":[120],"sub-question":[123],"guidance.":[124],"better":[126,183],"understand":[127],"compare":[131],"its":[132,138,173],"performance":[133,174],"competitive":[135],"baselines,":[136],"showing":[137],"effectiveness.":[139],"observe":[142],"that":[143,151],"our":[144],"functions":[146],"as":[147],"plug-and-play":[149],"module":[150],"be":[153],"applied":[154],"different":[156],"algorithms.":[158],"Furthermore,":[159],"conduct":[161],"analysis":[163],"decomposer,":[166],"revealing":[167],"how":[168],"affects":[172],"behavior,":[176],"type":[179],"guidance":[181],"is":[182],"suited":[184],"reasoner's":[188],"exploration":[189],"exploitation":[191],"abilities.":[192]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-04T00:00:00"}
