{"id":"https://openalex.org/W7130687680","doi":"https://doi.org/10.48550/arxiv.2602.17262","title":"Quantifying and Mitigating Socially Desirable Responding in LLMs: A Desirability-Matched Graded Forced-Choice Psychometric Study","display_name":"Quantifying and Mitigating Socially Desirable Responding in LLMs: A Desirability-Matched Graded Forced-Choice Psychometric Study","publication_year":2026,"publication_date":"2026-02-19","ids":{"openalex":"https://openalex.org/W7130687680","doi":"https://doi.org/10.48550/arxiv.2602.17262"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.17262","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126506393","display_name":"Kensuke Okada","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Okada, Kensuke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058625210","display_name":"Yui Furukawa","orcid":"https://orcid.org/0000-0001-6357-988X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Furukawa, Yui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5079169279","display_name":"Kyosuke Bunji","orcid":"https://orcid.org/0000-0002-9512-7439"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bunji, Kyosuke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5126506393"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14074","display_name":"Persona Design and Applications","score":0.3596999943256378,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14074","display_name":"Persona Design and Applications","score":0.3596999943256378,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0820000022649765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.05810000002384186,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7771999835968018},{"id":"https://openalex.org/keywords/persona","display_name":"Persona","score":0.7581999897956848},{"id":"https://openalex.org/keywords/audit","display_name":"Audit","score":0.7081000208854675},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6137999892234802},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5673999786376953},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5199999809265137},{"id":"https://openalex.org/keywords/item-response-theory","display_name":"Item response theory","score":0.48559999465942383},{"id":"https://openalex.org/keywords/construct-validity","display_name":"Construct validity","score":0.4650999903678894}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7771999835968018},{"id":"https://openalex.org/C313442","wikidata":"https://www.wikidata.org/wiki/Q778556","display_name":"Persona","level":2,"score":0.7581999897956848},{"id":"https://openalex.org/C199521495","wikidata":"https://www.wikidata.org/wiki/Q181487","display_name":"Audit","level":2,"score":0.7081000208854675},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6137999892234802},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.607200026512146},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5673999786376953},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5199999809265137},{"id":"https://openalex.org/C19875794","wikidata":"https://www.wikidata.org/wiki/Q1207340","display_name":"Item response theory","level":3,"score":0.48559999465942383},{"id":"https://openalex.org/C49453240","wikidata":"https://www.wikidata.org/wiki/Q1592163","display_name":"Construct validity","level":3,"score":0.4650999903678894},{"id":"https://openalex.org/C75630572","wikidata":"https://www.wikidata.org/wiki/Q538904","display_name":"Applied psychology","level":1,"score":0.44909998774528503},{"id":"https://openalex.org/C171606756","wikidata":"https://www.wikidata.org/wiki/Q506132","display_name":"Psychometrics","level":2,"score":0.44130000472068787},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.4260999858379364},{"id":"https://openalex.org/C141261163","wikidata":"https://www.wikidata.org/wiki/Q218600","display_name":"Corporate social responsibility","level":2,"score":0.4122999906539917},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3871000111103058},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3662000000476837},{"id":"https://openalex.org/C2776367032","wikidata":"https://www.wikidata.org/wiki/Q727558","display_name":"Social desirability bias","level":3,"score":0.32679998874664307},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29899999499320984},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C197947376","wikidata":"https://www.wikidata.org/wiki/Q5155608","display_name":"Comparability","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C2909622525","wikidata":"https://www.wikidata.org/wiki/Q506132","display_name":"Psychometric testing","level":4,"score":0.28220000863075256},{"id":"https://openalex.org/C3018868096","wikidata":"https://www.wikidata.org/wiki/Q2693233","display_name":"Internal consistency","level":3,"score":0.2750999927520752},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.27309998869895935},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.2605000138282776},{"id":"https://openalex.org/C54998920","wikidata":"https://www.wikidata.org/wiki/Q16255102","display_name":"Self-report study","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C116211729","wikidata":"https://www.wikidata.org/wiki/Q5372350","display_name":"Test validity","level":3,"score":0.2526000142097473}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.17262","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"pmh:oai:share.osf.io:b05f727e-0a65-4916-acaf-54f601839cc5","is_oa":false,"landing_page_url":"https://osf.io/2e6ny","pdf_url":null,"source":{"id":"https://openalex.org/S4306401127","display_name":"OSF Preprints (OSF Preprints)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2799848540","host_organization_name":"Center for Open Science","host_organization_lineage":["https://openalex.org/I2799848540"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Project"},{"id":"doi:10.48550/arxiv.2602.17262","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.17262","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.17262","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5599939227104187,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Human":[0],"self-report":[1],"questionnaires":[2,151],"are":[3],"increasingly":[4],"used":[5],"in":[6,30,62],"NLP":[7],"to":[8,19,57,135],"benchmark":[9],"and":[10,21,49,59,80,102,179,187],"audit":[11],"large":[12,154],"language":[13],"models":[14],"(LLMs),":[15],"from":[16,90,128],"persona":[17,170],"consistency":[18],"safety":[20],"bias":[22],"assessments.":[23],"Yet":[24],"these":[25],"instruments":[26],"presume":[27],"honest":[28],"responding;":[29],"evaluative":[31],"contexts,":[32],"LLMs":[33,141],"can":[34],"instead":[35],"gravitate":[36],"toward":[37],"socially":[38,43],"preferred":[39],"answers-a":[40],"form":[41],"of":[42,65,167,189],"desirable":[44],"responding":[45],"(SDR)-biasing":[46],"questionnaire-derived":[47],"scores":[48],"downstream":[50],"conclusions.":[51],"We":[52],"propose":[53],"a":[54,85,116,175],"psychometric":[55],"framework":[56],"quantify":[58,68],"mitigate":[60],"SDR":[61,81,161],"questionnaire-based":[63,185],"evaluation":[64],"LLMs.":[66,190],"To":[67],"SDR,":[69,155],"the":[70,165,168],"same":[71],"inventory":[72,122],"is":[73,82],"administered":[74],"under":[75],"HONEST":[76],"versus":[77],"FAKE-GOOD":[78],"instructions,":[79],"computed":[83],"as":[84,105,107],"direction-corrected":[86],"standardized":[87],"effect":[88],"size":[89],"item":[91,130],"response":[92,103],"theory":[93],"(IRT)-estimated":[94],"latent":[95],"scores.":[96],"This":[97],"enables":[98],"comparisons":[99],"across":[100],"constructs":[101],"formats,":[104],"well":[106],"against":[108],"human":[109],"instructed-faking":[110],"benchmarks.":[111],"For":[112],"mitigation,":[113],"we":[114],"construct":[115],"graded":[117],"forced-choice":[118],"(GFC)":[119],"Big":[120],"Five":[121],"by":[123],"selecting":[124],"30":[125],"cross-domain":[126],"pairs":[127],"an":[129],"pool":[131],"via":[132],"constrained":[133],"optimization":[134],"match":[136],"desirability.":[137],"Across":[138],"nine":[139],"instruction-following":[140],"evaluated":[142],"on":[143],"synthetic":[144],"personas":[145],"with":[146],"known":[147],"target":[148],"profiles,":[149],"Likert-style":[150],"show":[152],"consistently":[153],"whereas":[156],"desirability-matched":[157],"GFC":[158],"substantially":[159],"attenuates":[160],"while":[162],"largely":[163],"preserving":[164],"recovery":[166],"intended":[169],"profiles.":[171],"These":[172],"results":[173],"highlight":[174],"model-dependent":[176],"SDR-recovery":[177],"trade-off":[178],"motivate":[180],"SDR-aware":[181],"reporting":[182],"practices":[183],"for":[184],"benchmarking":[186],"auditing":[188]},"counts_by_year":[],"updated_date":"2026-04-30T06:05:26.967640","created_date":"2026-02-21T00:00:00"}
