{"id":"https://openalex.org/W7161175114","doi":"https://doi.org/10.48550/arxiv.2605.12645","title":"Training LLMs with Reinforcement Learning for Intent-Aware Personalized Question Answering","display_name":"Training LLMs with Reinforcement Learning for Intent-Aware Personalized Question Answering","publication_year":2026,"publication_date":"2026-05-12","ids":{"openalex":"https://openalex.org/W7161175114","doi":"https://doi.org/10.48550/arxiv.2605.12645"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.12645","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12645","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.12645","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136149344","display_name":"Maryam Amirizaniani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Amirizaniani, Maryam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038583917","display_name":"Benjamin Charles Germain Lee","orcid":"https://orcid.org/0000-0002-1677-6386"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Benjamin Charles Germain","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136111819","display_name":"Jevin West","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"West, Jevin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136187115","display_name":"Nicholas Weber","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weber, Nicholas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8779000043869019,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8779000043869019,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.05990000069141388,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.00989999994635582,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.7235999703407288},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.642300009727478},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6090999841690063},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.5299000144004822},{"id":"https://openalex.org/keywords/user-modeling","display_name":"User modeling","score":0.4909999966621399},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4099000096321106},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.40560001134872437}],"concepts":[{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.7235999703407288},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6610999703407288},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.642300009727478},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6090999841690063},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.5299000144004822},{"id":"https://openalex.org/C67712803","wikidata":"https://www.wikidata.org/wiki/Q7901853","display_name":"User modeling","level":3,"score":0.4909999966621399},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4415000081062317},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4099000096321106},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.40560001134872437},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.38749998807907104},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.350600004196167},{"id":"https://openalex.org/C2777379011","wikidata":"https://www.wikidata.org/wiki/Q938545","display_name":"Implicit learning","level":3,"score":0.33709999918937683},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.2930999994277954},{"id":"https://openalex.org/C142039133","wikidata":"https://www.wikidata.org/wiki/Q3620943","display_name":"Personalized learning","level":5,"score":0.2840999960899353},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2768999934196472},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.12645","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12645","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.12645","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12645","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Effective":[0],"personalized":[1,130],"question":[2,107],"answering":[3],"(PQA)":[4],"in":[5,11,60],"language":[6],"models":[7,97],"requires":[8],"grounding":[9],"responses":[10,145],"the":[12,20,53,64,77,150,157,177,187],"user's":[13,65,151],"underlying":[14,152],"intent,":[15],"where":[16,63],"intent":[17,51,102,141,185],"refers":[18],"to":[19,33,98],"implicit":[21,100,139,183],"``why''":[22],"behind":[23],"a":[24,91,105,115,129,191],"query":[25],"beyond":[26],"its":[27],"explicit":[28,142],"wording.":[29],"However,":[30],"existing":[31],"approaches":[32],"intent-aware":[34,125],"personalization":[35],"rely":[36],"on":[37,156],"multi-turn":[38],"conversational":[39],"context":[40],"or":[41],"rich":[42],"user":[43,50,101,140,184],"profiles,":[44],"and":[45,74,79,108,143],"do":[46],"not":[47],"explicitly":[48],"model":[49],"during":[52],"reasoning":[54,80],"process.":[55,81],"This":[56],"limits":[57],"their":[58],"effectiveness":[59],"single-turn":[61,106],"settings,":[62],"latent":[66],"goal":[67],"must":[68],"be":[69],"inferred":[70],"from":[71,104],"minimal":[72],"input":[73],"integrated":[75],"into":[76,111],"thinking":[78,112],"To":[82],"bridge":[83],"this":[84],"gap,":[85],"we":[86],"propose":[87],"IAP":[88,133,163],"(Intent-Aware":[89],"Personalization),":[90],"reinforcement":[92],"learning":[93],"framework":[94],"that":[95,137,146,181],"trains":[96],"infer":[99],"directly":[103],"incorporate":[109],"it":[110],"steps":[113],"through":[114],"tag-based":[116],"schema":[117],"for":[118,194],"generating":[119],"personalized,":[120],"intent-grounded":[121],"answers.":[122],"By":[123],"optimizing":[124],"answer":[126],"trajectories":[127],"under":[128],"reward":[131],"function,":[132],"reinforces":[134],"generation":[135],"paths":[136],"make":[138],"produce":[144],"better":[147],"align":[148],"with":[149],"goal.":[153],"Through":[154],"experiments":[155],"LaMP-QA":[158],"benchmark":[159],"across":[160],"six":[161],"models,":[162],"consistently":[164],"outperforms":[165],"all":[166],"baselines,":[167],"achieving":[168],"an":[169],"average":[170],"macro-score":[171],"gain":[172],"of":[173],"around":[174],"7.5\\%":[175],"over":[176],"strongest":[178],"competitor,":[179],"demonstrating":[180],"modeling":[182],"within":[186],"training":[188],"objective":[189],"is":[190],"promising":[192],"direction":[193],"PQA.":[195]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-15T00:00:00"}
