{"id":"https://openalex.org/W7140384334","doi":"https://doi.org/10.48550/arxiv.2603.24093","title":"Towards Effective Experiential Learning: Dual Guidance for Utilization and Internalization","display_name":"Towards Effective Experiential Learning: Dual Guidance for Utilization and Internalization","publication_year":2026,"publication_date":"2026-03-25","ids":{"openalex":"https://openalex.org/W7140384334","doi":"https://doi.org/10.48550/arxiv.2603.24093"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.24093","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24093","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.24093","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120402710","display_name":"Fei Bai","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Bai, Fei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130696616","display_name":"Zhipeng Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhipeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130645755","display_name":"Chuan Hao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao, Chuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130689580","display_name":"Ming yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Ming","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130670589","display_name":"Ran Tao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tao, Ran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130689265","display_name":"Bryan Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Bryan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130707946","display_name":"Wayne Xin Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Wayne Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130690581","display_name":"Jian Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130672218","display_name":"Hongteng Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Hongteng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5120402710"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.14589999616146088,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.14589999616146088,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.12620000541210175,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.11969999969005585,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7495999932289124},{"id":"https://openalex.org/keywords/experiential-learning","display_name":"Experiential learning","score":0.7103000283241272},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6510999798774719},{"id":"https://openalex.org/keywords/internalization","display_name":"Internalization","score":0.5834000110626221},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.5159000158309937},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.31610000133514404}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7495999932289124},{"id":"https://openalex.org/C37228920","wikidata":"https://www.wikidata.org/wiki/Q1307600","display_name":"Experiential learning","level":2,"score":0.7103000283241272},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6510999798774719},{"id":"https://openalex.org/C139770010","wikidata":"https://www.wikidata.org/wiki/Q1339807","display_name":"Internalization","level":3,"score":0.5834000110626221},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5206000208854675},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.5159000158309937},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.4771000146865845},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.4675000011920929},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.45249998569488525},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3785000145435333},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3433000147342682},{"id":"https://openalex.org/C195094911","wikidata":"https://www.wikidata.org/wiki/Q14167904","display_name":"Process management","level":1,"score":0.3407000005245209},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.33959999680519104},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.31610000133514404},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.3102000057697296},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C2983223341","wikidata":"https://www.wikidata.org/wiki/Q1361181","display_name":"Role playing","level":2,"score":0.2547000050544739},{"id":"https://openalex.org/C2776943663","wikidata":"https://www.wikidata.org/wiki/Q165687","display_name":"Human capital","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.24093","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24093","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.24093","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24093","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recently,":[0],"reinforcement":[1,18],"learning~(RL)":[2],"has":[3,23],"become":[4],"an":[5,107],"important":[6],"approach":[7],"for":[8,29],"improving":[9],"the":[10,120,124,128,140],"capabilities":[11],"of":[12,123,151,170],"large":[13],"language":[14],"models~(LLMs).":[15],"In":[16],"particular,":[17],"learning":[19],"from":[20,110],"verifiable":[21],"rewards~(RLVR)":[22],"emerged":[24],"as":[25],"a":[26,39,90,148],"promising":[27],"paradigm":[28],"reasoning":[30],"tasks.":[31],"However,":[32],"existing":[33],"RL-based":[34],"training":[35,101],"still":[36],"remains":[37],"only":[38],"rough":[40],"approximation":[41],"to":[42,53,99,138,173],"human":[43],"learning.":[44],"Human":[45],"learners":[46],"leverage":[47],"both":[48],"external":[49],"and":[50,56,75,96,127,143,154,168],"internal":[51,130],"experience":[52,77,108,125,141,152,171],"guide":[54],"exploration":[55,118],"gradually":[57],"internalize":[58,76],"useful":[59],"trajectories":[60,134],"into":[61],"stable":[62],"knowledge.":[63,131],"Motivated":[64],"by":[65],"this":[66,83],"gap,":[67],"we":[68,85],"ask:":[69],"how":[70],"can":[71],"LLMs":[72],"better":[73,166],"utilize":[74],"during":[78],"RLVR":[79],"training?":[80],"To":[81],"answer":[82],"question,":[84],"propose":[86],"\\textbf{D}ual":[87],"\\textbf{G}uidance":[88],"\\textbf{O}ptimization~(\\textbf{DGO}),":[89],"unified":[91],"framework":[92],"that":[93,158,165],"leverages":[94],"\\emph{external}":[95],"\\emph{internal":[97],"experience}":[98],"improve":[100],"effectiveness.":[102],"Specifically,":[103],"DGO":[104,159],"first":[105],"constructs":[106],"bank":[109,126,142],"previously":[111],"explored":[112],"trajectories.":[113],"The":[114,132],"policy":[115],"then":[116],"performs":[117],"under":[119],"joint":[121],"guidance":[122],"model's":[129],"resulting":[133],"are":[135],"further":[136],"used":[137],"refine":[139],"optimize":[144],"model":[145],"parameters,":[146],"forming":[147],"closed":[149],"loop":[150],"utilization":[153,167],"internalization.":[155],"Experiments":[156],"show":[157],"consistently":[160],"outperforms":[161],"baseline":[162],"methods,":[163],"suggesting":[164],"internalization":[169],"lead":[172],"more":[174],"effective":[175],"reasoning.":[176]},"counts_by_year":[],"updated_date":"2026-03-27T06:05:27.210665","created_date":"2026-03-27T00:00:00"}
