{"id":"https://openalex.org/W7135200690","doi":"https://doi.org/10.48550/arxiv.2603.11901","title":"FlexRec: Adapting LLM-based Recommenders for Flexible Needs via Reinforcement Learning","display_name":"FlexRec: Adapting LLM-based Recommenders for Flexible Needs via Reinforcement Learning","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7135200690","doi":"https://doi.org/10.48550/arxiv.2603.11901"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.11901","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11901","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.11901","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128992084","display_name":"Yijun Pan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Pan, Yijun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046486099","display_name":"Weikang Qiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Weikang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129011848","display_name":"Qiyao Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Qiyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128981855","display_name":"Mingxuan Ju","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ju, Mingxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129042733","display_name":"Tong Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Tong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129083170","display_name":"Neil Shah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shah, Neil","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129054515","display_name":"Rex Ying","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ying, Rex","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5128992084"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9175000190734863,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9175000190734863,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.012400000356137753,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.005100000184029341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7282000184059143},{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.6908000111579895},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.670799970626831},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6313999891281128},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.6039000153541565},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5956000089645386},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5789999961853027}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7484999895095825},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7282000184059143},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.6908000111579895},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.670799970626831},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6313999891281128},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.6039000153541565},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5956000089645386},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5789999961853027},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5679000020027161},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.531499981880188},{"id":"https://openalex.org/C21308566","wikidata":"https://www.wikidata.org/wiki/Q7169365","display_name":"Permutation (music)","level":2,"score":0.37070000171661377},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.33550000190734863},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C21569690","wikidata":"https://www.wikidata.org/wiki/Q94702","display_name":"Collaborative filtering","level":3,"score":0.27309998869895935},{"id":"https://openalex.org/C73602740","wikidata":"https://www.wikidata.org/wiki/Q7795822","display_name":"Thompson sampling","level":3,"score":0.2728999853134155},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.25940001010894775},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.2572000026702881}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.11901","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11901","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.11901","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11901","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Modern":[0],"recommender":[1],"systems":[2],"must":[3],"adapt":[4],"to":[5,26,52,87,103,119,165,186,192,201],"dynamic,":[6],"need-specific":[7,195],"objectives":[8],"for":[9,19,49],"diverse":[10,172],"recommendation":[11,54,173],"scenarios,":[12],"yet":[13],"most":[14],"traditional":[15,210],"recommenders":[16,211],"are":[17],"optimized":[18],"a":[20,46,68,71,127,137],"single":[21],"static":[22],"target":[23],"and":[24,40,79,108,114,121,151,161,175,188,197,212],"struggle":[25],"reconfigure":[27],"behavior":[28],"on":[29,76,143],"demand.":[30],"Recent":[31],"advances":[32],"in":[33,43,194],"reinforcement-learning-based":[34],"post-training":[35,128],"have":[36],"unlocked":[37],"strong":[38,209],"instruction-following":[39],"reasoning":[41],"capabilities":[42],"LLMs,":[44],"suggesting":[45],"principled":[47],"route":[48],"aligning":[50],"them":[51],"complex":[53],"goals.":[55],"Motivated":[56],"by":[57,184,190],"this,":[58],"we":[59],"study":[60],"closed-set":[61],"autoregressive":[62],"ranking,":[63,196],"where":[64],"an":[65,80],"LLM":[66],"generates":[67],"permutation":[69],"over":[70],"fixed":[72],"candidate":[73,149],"set":[74],"conditioned":[75],"user":[77],"context":[78],"explicit":[81],"need":[82],"instruction.":[83],"However,":[84],"applying":[85],"RL":[86,129],"this":[88],"setting":[89],"faces":[90],"two":[91],"key":[92],"obstacles:":[93],"(i)":[94],"sequence-level":[95],"rewards":[96,164],"yield":[97],"coarse":[98],"credit":[99],"assignment":[100],"that":[101,131,156],"fails":[102],"provide":[104],"fine-grained":[105],"training":[106],"signals,":[107],"(ii)":[109],"interaction":[110],"feedback":[111],"is":[112],"sparse":[113,169],"noisy,":[115],"which":[116],"together":[117],"lead":[118],"inefficient":[120],"unstable":[122],"updates.":[123],"We":[124],"propose":[125],"FlexRec,":[126],"framework":[130],"addresses":[132],"both":[133],"issues":[134],"with":[135],"(1)":[136],"causally":[138],"grounded":[139],"item-level":[140],"reward":[141,159],"based":[142],"counterfactual":[144],"swaps":[145],"within":[146],"the":[147],"remaining":[148],"pool,":[150],"(2)":[152],"critic-guided,":[153],"uncertainty-aware":[154],"scaling":[155],"explicitly":[157],"models":[158],"uncertainty":[160],"down-weights":[162],"low-confidence":[163],"stabilize":[166],"learning":[167],"under":[168,205],"supervision.":[170],"Across":[171],"scenarios":[174],"objectives,":[176],"FlexRec":[177],"achieves":[178,199],"substantial":[179],"gains:":[180],"it":[181],"improves":[182],"NDCG@5":[183],"up":[185,191,200],"\\textbf{59\\%}":[187],"Recall@5":[189,203],"\\textbf{109.4\\%}":[193],"further":[198],"\\textbf{24.1\\%}":[202],"improvement":[204],"generalization":[206],"settings,":[207],"outperforming":[208],"LLM-based":[213],"baselines.":[214]},"counts_by_year":[],"updated_date":"2026-03-14T06:46:50.379900","created_date":"2026-03-14T00:00:00"}
