{"id":"https://openalex.org/W4412673539","doi":"https://doi.org/10.1145/3731120.3744598","title":"Response Generation through Social Reasoning in Large Language Models with Direct Diverse Preferences Optimization","display_name":"Response Generation through Social Reasoning in Large Language Models with Direct Diverse Preferences Optimization","publication_year":2025,"publication_date":"2025-07-18","ids":{"openalex":"https://openalex.org/W4412673539","doi":"https://doi.org/10.1145/3731120.3744598"},"language":"en","primary_location":{"id":"doi:10.1145/3731120.3744598","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3731120.3744598","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023630015","display_name":"Maryam Amirizaniani","orcid":"https://orcid.org/0000-0002-6142-0637"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Maryam Amirizaniani","raw_affiliation_strings":["University of Washington, Seattle, WA, USA"],"raw_orcid":"https://orcid.org/0000-0002-6142-0637","affiliations":[{"raw_affiliation_string":"University of Washington, Seattle, WA, USA","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113247992","display_name":"Elias Martin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210138624","display_name":"University of Washington Bothell","ror":"https://ror.org/02ygzhr13","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701","https://openalex.org/I4210138624"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elias Martin","raw_affiliation_strings":["University of Washington - Bothell, Bothell, WA, USA"],"raw_orcid":"https://orcid.org/0009-0008-3178-1408","affiliations":[{"raw_affiliation_string":"University of Washington - Bothell, Bothell, WA, USA","institution_ids":["https://openalex.org/I4210138624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061449471","display_name":"Afra Mashhadi","orcid":"https://orcid.org/0000-0003-4631-4438"},"institutions":[{"id":"https://openalex.org/I4210138624","display_name":"University of Washington Bothell","ror":"https://ror.org/02ygzhr13","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701","https://openalex.org/I4210138624"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Afra Mashhadi","raw_affiliation_strings":["University of Washington - Bothell, Bothell, WA, USA"],"raw_orcid":"https://orcid.org/0000-0003-4631-4438","affiliations":[{"raw_affiliation_string":"University of Washington - Bothell, Bothell, WA, USA","institution_ids":["https://openalex.org/I4210138624"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061319881","display_name":"Chirag Shah","orcid":"https://orcid.org/0000-0002-3797-4293"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chirag Shah","raw_affiliation_strings":["University of Washington, Seattle, WA, USA"],"raw_orcid":"https://orcid.org/0000-0002-3797-4293","affiliations":[{"raw_affiliation_string":"University of Washington, Seattle, WA, USA","institution_ids":["https://openalex.org/I201448701"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5023630015"],"corresponding_institution_ids":["https://openalex.org/I201448701"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08661441,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"315","last_page":"325"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9847000241279602,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6814746856689453}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6814746856689453}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3731120.3744598","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3731120.3744598","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 International ACM SIGIR Conference on Innovative Concepts and Theories in Information Retrieval (ICTIR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.6299999952316284,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W43928053","https://openalex.org/W2017835493","https://openalex.org/W2234786245","https://openalex.org/W2512715634","https://openalex.org/W2763110165","https://openalex.org/W2970062726","https://openalex.org/W3158607268","https://openalex.org/W4385567216","https://openalex.org/W4390599914","https://openalex.org/W4390961181","https://openalex.org/W4391591751","https://openalex.org/W4392384650","https://openalex.org/W4393146968","https://openalex.org/W4393160410","https://openalex.org/W4396832610","https://openalex.org/W4396833393","https://openalex.org/W4399300217","https://openalex.org/W4401042684","https://openalex.org/W4401042987","https://openalex.org/W4402670094","https://openalex.org/W4402670301","https://openalex.org/W4402671303","https://openalex.org/W4402671561","https://openalex.org/W4402671807","https://openalex.org/W4403577434","https://openalex.org/W4403582472","https://openalex.org/W4407765460","https://openalex.org/W4407953093","https://openalex.org/W4409364742","https://openalex.org/W4411113384","https://openalex.org/W7042815489"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"demonstrated":[5],"capabilities":[6],"across":[7],"a":[8,89],"wide":[9],"range":[10],"of":[11,63,101,119,127,136,178,198,221],"information":[12],"retrieval":[13],"(IR)":[14],"tasks,":[15],"including":[16],"generating":[17,189],"reasoning-based":[18,181],"responses":[19,108,122,147,190],"for":[20,109],"social":[21,110,162,215],"questions.":[22],"A":[23],"common":[24],"approach":[25],"to":[26,49,56,115,144,217],"enhancing":[27],"these":[28],"abilities":[29],"involves":[30],"optimizing":[31],"model":[32],"behavior":[33,96],"based":[34],"on":[35,160],"human":[36,64,137,155,194,210],"preferences":[37],"through":[38,206],"an":[39,175],"implicit":[40],"reward":[41],"function.":[42],"However,":[43],"existing":[44],"frameworks":[45],"are":[46,149],"often":[47],"limited":[48],"binary":[50],"win-lose":[51],"output":[52],"optimization,":[53],"which":[54],"fails":[55],"capture":[57],"the":[58,117,125,133,219],"nuanced":[59],"and":[60,74,105,167,208],"diverse":[61,209],"nature":[62,135],"preferences,":[65,211],"especially":[66],"in":[67,180,188,202],"socially":[68,222],"complex":[69],"scenarios":[70],"that":[71,92,148,191],"involve":[72],"trade-offs":[73],"multiple":[75],"valid":[76],"viewpoints.":[77],"To":[78],"address":[79],"this":[80,199],"limitation,":[81],"we":[82],"propose":[83],"Direct":[84],"Diverse":[85],"Preference":[86],"Optimization":[87],"(DDPO),":[88],"novel":[90],"framework":[91],"models":[93],"user":[94,204],"preference":[95],"by":[97],"leveraging":[98,203],"ranked":[99,207],"sets":[100],"both":[102],"preferred":[103],"(win)":[104],"non-preferred":[106],"(loss)":[107],"reasoning":[111,146,224],"task.":[112],"DDPO":[113,159],"aims":[114],"increase":[116],"probability":[118,126],"higher-ranked":[120],"win":[121],"while":[123],"decreasing":[124],"lower-ranked":[128],"loss":[129],"responses,":[130],"effectively":[131],"capturing":[132],"graded":[134],"preferences.":[138,156],"This":[139],"rank-aware":[140],"optimization":[141],"enables":[142],"LLMs":[143],"generate":[145],"more":[150],"closely":[151],"aligned":[152],"with":[153],"varied":[154],"We":[157],"evaluate":[158],"real-world":[161,214],"questions":[163],"collected":[164],"from":[165,213],"Reddit":[166],"Lemmy":[168],"using":[169],"two":[170],"LLMs.":[171],"The":[172,196],"results":[173],"show":[174],"average":[176],"improvement":[177],"8.7%":[179],"logical":[182],"inference":[183],"metrics,":[184],"demonstrating":[185],"its":[186],"effectiveness":[187],"better":[192],"reflect":[193],"expectations.":[195],"novelty":[197],"work":[200],"lies":[201],"modeling":[205],"derived":[212],"platforms,":[216],"guide":[218],"generation":[220],"grounded":[223],"responses.":[225]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
