{"id":"https://openalex.org/W7138303899","doi":"https://doi.org/10.1609/aaai.v40i28.39484","title":"State Proficiency-Based Adaptive Fine-Tuning for Offline-to-Online Reinforcement Learning","display_name":"State Proficiency-Based Adaptive Fine-Tuning for Offline-to-Online Reinforcement Learning","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138303899","doi":"https://doi.org/10.1609/aaai.v40i28.39484"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v40i28.39484","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i28.39484","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i28.39484","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129672286","display_name":"Songlin Li","orcid":null},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210136497","display_name":"Jilin Medical University","ror":"https://ror.org/03mzw7781","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210136497"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Songlin Li","raw_affiliation_strings":["Jilin University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jilin University","institution_ids":["https://openalex.org/I4210136497","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129737975","display_name":"Wei Xiao","orcid":null},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210136497","display_name":"Jilin Medical University","ror":"https://ror.org/03mzw7781","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210136497"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Xiao","raw_affiliation_strings":["Jilin University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jilin University","institution_ids":["https://openalex.org/I4210136497","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129664033","display_name":"Hao Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210136497","display_name":"Jilin Medical University","ror":"https://ror.org/03mzw7781","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210136497"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Wu","raw_affiliation_strings":["Jilin University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jilin University","institution_ids":["https://openalex.org/I4210136497","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129737452","display_name":"Xiaodan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210136497","display_name":"Jilin Medical University","ror":"https://ror.org/03mzw7781","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210136497"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaodan Zhang","raw_affiliation_strings":["Jilin University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jilin University","institution_ids":["https://openalex.org/I4210136497","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076347333","display_name":"Daolong An","orcid":"https://orcid.org/0009-0005-0020-1990"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210136497","display_name":"Jilin Medical University","ror":"https://ror.org/03mzw7781","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210136497"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daolong An","raw_affiliation_strings":["Jilin University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jilin University","institution_ids":["https://openalex.org/I4210136497","https://openalex.org/I194450716"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129736076","display_name":"Shuai Lu","orcid":null},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210136497","display_name":"Jilin Medical University","ror":"https://ror.org/03mzw7781","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210136497"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuai L\u00fc","raw_affiliation_strings":["Jilin University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jilin University","institution_ids":["https://openalex.org/I4210136497","https://openalex.org/I194450716"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5129672286"],"corresponding_institution_ids":["https://openalex.org/I194450716","https://openalex.org/I4210136497"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.44745602,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"28","first_page":"23169","last_page":"23176"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8356000185012817,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.8356000185012817,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.039500001817941666,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.007300000172108412,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8215000033378601},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.6150000095367432},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5860999822616577},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5306000113487244},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4767000079154968},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.40939998626708984},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.3637999892234802}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8215000033378601},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6680999994277954},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.6150000095367432},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5860999822616577},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5306000113487244},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4767000079154968},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4634999930858612},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4449000060558319},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.40939998626708984},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.3637999892234802},{"id":"https://openalex.org/C125014702","wikidata":"https://www.wikidata.org/wiki/Q4680749","display_name":"Adaptive learning","level":2,"score":0.3107999861240387},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.30640000104904175},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.28940001130104065},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C166052673","wikidata":"https://www.wikidata.org/wiki/Q83021","display_name":"Empirical evidence","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C52970973","wikidata":"https://www.wikidata.org/wiki/Q2497134","display_name":"Adaptive system","level":2,"score":0.27379998564720154},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.26179999113082886},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.25760000944137573},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1609/aaai.v40i28.39484","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i28.39484","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:ojs.aaai.org:article/39484","is_oa":false,"landing_page_url":"https://ojs.aaai.org/index.php/AAAI/article/view/39484","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2159-5399","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i28.39484","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i28.39484","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"offline-to-online":[1],"(O2O)":[2],"reinforcement":[3],"learning,":[4],"achieving":[5],"efficient":[6],"performance":[7,147],"improvement":[8,30,116],"while":[9,121],"maintaining":[10],"training":[11],"stability":[12],"remains":[13],"a":[14,80,90],"critical":[15],"challenge":[16],"for":[17],"effective":[18,77,93,105],"fine-tuning.":[19,36,106],"Existing":[20],"O2O":[21],"methods":[22],"usually":[23],"focus":[24],"on":[25,110,148],"the":[26,54,67,74,149],"balance":[27],"between":[28],"policy":[29,32,57,101,115,127],"and":[31],"constraint":[33,128],"during":[34,114,126],"online":[35],"However,":[37],"they":[38],"often":[39],"overlook":[40],"sample":[41,98,119],"differences,":[42],"leading":[43],"to":[44,72,103,117,129],"suboptimal":[45],"performance.":[46],"To":[47],"address":[48],"this":[49],"challenge,":[50],"we":[51,65],"identify":[52],"that":[53,95,137],"effectiveness":[55],"of":[56,69,76],"learning":[58,78],"exhibits":[59],"significant":[60,140],"variation":[61],"across":[62],"states.":[63],"Therefore,":[64],"propose":[66,84],"notion":[68],"state":[70],"proficiency":[71,112,124],"capture":[73],"degree":[75],"in":[79,100],"given":[81],"state.":[82],"We":[83],"State":[85],"Proficiency-Based":[86],"Adaptive":[87],"Fine-Tuning":[88],"(SPA),":[89],"straightforward":[91],"yet":[92],"method":[94],"establishes":[96],"proficiency-based":[97],"priorities":[99],"optimization":[102],"facilitate":[104],"Specifically,":[107],"SPA":[108,138],"focuses":[109],"low":[111],"samples":[113,125],"enhance":[118],"efficiency,":[120],"emphasizing":[122],"high":[123],"ensure":[130],"stable":[131],"training.":[132],"Extensive":[133],"empirical":[134],"results":[135],"demonstrate":[136],"achieves":[139],"improvements":[141],"over":[142],"existing":[143],"methods,":[144],"attaining":[145],"state-of-the-art":[146],"D4RL":[150],"benchmark.":[151]},"counts_by_year":[],"updated_date":"2026-06-08T08:47:23.578185","created_date":"2026-03-18T00:00:00"}
