{"id":"https://openalex.org/W7138006850","doi":"https://doi.org/10.1609/aaai.v40i26.39348","title":"Simulated Rewards, Skewed Strategies: Tracing the Acquired Preference Bias in LLM-Based Dialogue Planners","display_name":"Simulated Rewards, Skewed Strategies: Tracing the Acquired Preference Bias in LLM-Based Dialogue Planners","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138006850","doi":"https://doi.org/10.1609/aaai.v40i26.39348"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i26.39348","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39348","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i26.39348","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129666090","display_name":"Heyan Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Heyan Huang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129665636","display_name":"Yizhe Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yizhe Yang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129640823","display_name":"Huashan Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huashan Sun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129714021","display_name":"Jiawei Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiawei Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129645635","display_name":"Yang Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang Gao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5129666090"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.22685928,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"26","first_page":"21948","last_page":"21956"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.20059999823570251,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.20059999823570251,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.19359999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.1444000005722046,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.7791000008583069},{"id":"https://openalex.org/keywords/preference-elicitation","display_name":"Preference elicitation","score":0.4399999976158142},{"id":"https://openalex.org/keywords/persona","display_name":"Persona","score":0.4041999876499176},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.3580999970436096},{"id":"https://openalex.org/keywords/tracing","display_name":"Tracing","score":0.33820000290870667}],"concepts":[{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.7791000008583069},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48840001225471497},{"id":"https://openalex.org/C2777868144","wikidata":"https://www.wikidata.org/wiki/Q7239817","display_name":"Preference elicitation","level":3,"score":0.4399999976158142},{"id":"https://openalex.org/C313442","wikidata":"https://www.wikidata.org/wiki/Q778556","display_name":"Persona","level":2,"score":0.4041999876499176},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.36000001430511475},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.3580999970436096},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.34549999237060547},{"id":"https://openalex.org/C138673069","wikidata":"https://www.wikidata.org/wiki/Q322229","display_name":"Tracing","level":2,"score":0.33820000290870667},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32339999079704285},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.29190000891685486},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.29190000891685486},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.2669000029563904}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i26.39348","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39348","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i26.39348","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39348","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"have":[3],"enabled":[4],"sophisticated":[5],"dialogue":[6,36,45,124],"planning":[7,41,136],"policy,":[8],"but":[9],"their":[10],"reliance":[11],"on":[12],"LLM-generated":[13],"simulation":[14,76,86],"and":[15,111,126,134],"feedback":[16],"for":[17,119,130],"policy":[18],"optimization":[19],"may":[20],"introduce":[21],"systematic":[22],"preference":[23,32,63,95],"bias.":[24],"We":[25],"present":[26],"the":[27,79],"first":[28],"comprehensive":[29],"analysis":[30],"of":[31,122],"bias":[33,81,96],"in":[34],"LLM-based":[35,123],"planners,":[37],"evaluating":[38],"four":[39],"state-of-the-art":[40],"policies":[42],"across":[43],"three":[44],"domains":[46],"using":[47],"multiple":[48],"LLM":[49],"families":[50],"at":[51],"varying":[52],"scales.":[53],"Our":[54,114],"investigation":[55],"reveals":[56],"that":[57,103],"all":[58],"tested":[59],"planners":[60,98],"exhibit":[61],"significant":[62],"bias,":[64],"systematically":[65],"favoring":[66],"narrow":[67],"strategy":[68],"sets":[69],"rather":[70],"than":[71],"maintaining":[72],"balanced":[73],"distributions.":[74],"User":[75],"emerges":[77],"as":[78,88],"primary":[80],"driver,":[82],"while":[83,107],"diverse":[84],"persona":[85],"fails":[87],"an":[89],"effective":[90],"mitigation":[91],"strategy.":[92],"Most":[93],"concerning,":[94],"drives":[97],"toward":[99],"ethically":[100],"problematic":[101],"strategies":[102],"achieve":[104],"short-term":[105],"success":[106],"undermining":[108],"real-world":[109],"effectiveness":[110],"ethical":[112],"standards.":[113],"findings":[115],"establish":[116],"fundamental":[117],"challenges":[118],"responsible":[120],"deployment":[121],"systems":[125],"provide":[127],"crucial":[128],"insights":[129],"developing":[131],"more":[132],"reliable":[133],"ethically-aligned":[135],"approaches.":[137]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
