{"id":"https://openalex.org/W7136164870","doi":"https://doi.org/10.48550/arxiv.2603.12273","title":"Aligning Language Models from User Interactions","display_name":"Aligning Language Models from User Interactions","publication_year":2026,"publication_date":"2026-02-18","ids":{"openalex":"https://openalex.org/W7136164870","doi":"https://doi.org/10.48550/arxiv.2603.12273"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.12273","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12273","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.12273","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026228535","display_name":"Thomas Kleine Buening","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Buening, Thomas Kleine","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082779019","display_name":"Jonas H\u00fcbotter","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"H\u00fcbotter, Jonas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127861793","display_name":"Barna P\u00e1sztor","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"P\u00e1sztor, Barna","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129538381","display_name":"Idan Shenfeld","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shenfeld, Idan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048076721","display_name":"Giorgia Ramponi","orcid":"https://orcid.org/0000-0002-3076-5448"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ramponi, Giorgia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129543950","display_name":"Andreas Krause","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Krause, Andreas","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5026228535"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.24230000376701355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.24230000376701355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.06669999659061432,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.057999998331069946,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6765999794006348},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6435999870300293},{"id":"https://openalex.org/keywords/hindsight-bias","display_name":"Hindsight bias","score":0.5460000038146973},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5322999954223633},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.4530999958515167},{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language understanding","score":0.4307999908924103},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.39640000462532043},{"id":"https://openalex.org/keywords/user-modeling","display_name":"User modeling","score":0.3905999958515167}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8345000147819519},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6765999794006348},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6435999870300293},{"id":"https://openalex.org/C10347200","wikidata":"https://www.wikidata.org/wiki/Q1960297","display_name":"Hindsight bias","level":2,"score":0.5460000038146973},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5322999954223633},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.47440001368522644},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46050000190734863},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4530999958515167},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.4307999908924103},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.39640000462532043},{"id":"https://openalex.org/C67712803","wikidata":"https://www.wikidata.org/wiki/Q7901853","display_name":"User modeling","level":3,"score":0.3905999958515167},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.37689998745918274},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32350000739097595},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.30570000410079956},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2816999852657318},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2752000093460083},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C201025465","wikidata":"https://www.wikidata.org/wiki/Q11248500","display_name":"User experience design","level":2,"score":0.25699999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.12273","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12273","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.12273","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12273","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5576553344726562,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multi-turn":[0],"user":[1,32,98,156,196],"interactions":[2,26,99,197],"are":[3,57],"among":[4],"the":[5,51,73,104,107,113,118,127,132,146],"most":[6],"abundant":[7],"data":[8],"produced":[9],"by":[10],"language":[11,55,161],"models,":[12],"yet":[13],"we":[14,121,150],"lack":[15],"effective":[16],"methods":[17],"to":[18,42,60,79,87,180,183],"learn":[19],"from":[20,97,158],"them.":[21],"While":[22],"typically":[23],"discarded,":[24],"these":[25],"often":[27,77],"contain":[28],"useful":[29],"information:":[30],"follow-up":[31,109],"messages":[33],"may":[34],"indicate":[35],"that":[36,129,152,194,198],"a":[37,70,89,123],"response":[38],"was":[39],"incorrect,":[40],"failed":[41],"follow":[43],"an":[44],"instruction,":[45],"or":[46],"did":[47],"not":[48],"align":[49],"with":[50,117],"user's":[52,71,108],"preferences.":[53],"Importantly,":[54],"models":[56,162,179],"already":[58],"able":[59,78],"make":[61],"use":[62],"of":[63],"this":[64,85,141],"information":[65],"in":[66,136],"context.":[67],"After":[68],"observing":[69],"follow-up,":[72],"same":[74,174],"model":[75,105],"is":[76],"revise":[80],"its":[81],"behavior.":[82],"We":[83,138],"leverage":[84],"ability":[86],"propose":[88],"principled":[90],"and":[91,111,166,206],"scalable":[92],"method":[93],"for":[94,125],"learning":[95],"directly":[96],"through":[100,186],"self-distillation.":[101],"By":[102],"conditioning":[103],"on":[106,154],"message":[110],"comparing":[112],"resulting":[114],"token":[115],"distribution":[116,143],"original":[119],"policy,":[120],"obtain":[122],"target":[124],"updating":[126],"policy":[128],"captures":[130],"how":[131],"model's":[133],"behavior":[134],"changes":[135],"hindsight.":[137],"then":[139],"distill":[140],"hindsight":[142],"back":[144],"into":[145],"current":[147],"policy.":[148],"Remarkably,":[149],"show":[151],"training":[153],"real-world":[155],"conversations":[157],"WildChat":[159],"improves":[160],"across":[163],"standard":[164],"alignment":[165],"instruction-following":[167],"benchmarks,":[168],"without":[169,188],"regressing":[170],"other":[171],"capabilities.":[172],"The":[173],"mechanism":[175],"enables":[176],"personalization,":[177,205],"allowing":[178],"continually":[181],"adapt":[182],"individual":[184],"users":[185],"interaction":[187],"explicit":[189],"feedback.":[190],"Our":[191],"results":[192],"demonstrate":[193],"raw":[195],"arise":[199],"naturally":[200],"during":[201],"deployment":[202],"enable":[203],"alignment,":[204],"continual":[207],"adaptation.":[208]},"counts_by_year":[],"updated_date":"2026-03-17T07:05:13.627479","created_date":"2026-03-17T00:00:00"}
