{"id":"https://openalex.org/W7134844201","doi":"https://doi.org/10.48550/arxiv.2603.08412","title":"Aligning to Illusions: Choice Blindness in Human and AI Feedback","display_name":"Aligning to Illusions: Choice Blindness in Human and AI Feedback","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134844201","doi":"https://doi.org/10.48550/arxiv.2603.08412"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.08412","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128637457","display_name":"Wenbin Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wu, Wenbin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5128637457"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.15139999985694885,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.15139999985694885,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.1436000019311905,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.10180000215768814,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/blindness","display_name":"Blindness","score":0.652999997138977},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.585099995136261},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5692999958992004},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.5547000169754028},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.49230000376701355},{"id":"https://openalex.org/keywords/proxy","display_name":"Proxy (statistics)","score":0.40779998898506165},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.373199999332428},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.3422999978065491}],"concepts":[{"id":"https://openalex.org/C2780929884","wikidata":"https://www.wikidata.org/wiki/Q737460","display_name":"Blindness","level":2,"score":0.652999997138977},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.585099995136261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.571399986743927},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5692999958992004},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.5547000169754028},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5029000043869019},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.49230000376701355},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4724000096321106},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4544000029563904},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.4194999933242798},{"id":"https://openalex.org/C2780148112","wikidata":"https://www.wikidata.org/wiki/Q1432581","display_name":"Proxy (statistics)","level":2,"score":0.40779998898506165},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.373199999332428},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.36489999294281006},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.3422999978065491},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.3208000063896179},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C181204326","wikidata":"https://www.wikidata.org/wiki/Q7239820","display_name":"Preference learning","level":3,"score":0.289900004863739},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C122770356","wikidata":"https://www.wikidata.org/wiki/Q1656753","display_name":"Identifiability","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2750999927520752},{"id":"https://openalex.org/C15123163","wikidata":"https://www.wikidata.org/wiki/Q500096","display_name":"Psychophysics","level":3,"score":0.2694000005722046},{"id":"https://openalex.org/C2777868144","wikidata":"https://www.wikidata.org/wiki/Q7239817","display_name":"Preference elicitation","level":3,"score":0.26589998602867126},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.26330000162124634}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.08412","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.08412","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08412","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.08412","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.7798884510993958}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"from":[2,68,74,93],"Human":[3],"Feedback":[4],"(RLHF)":[5],"assumes":[6],"annotator":[7],"preferences":[8,33],"reflect":[9],"stable":[10],"internal":[11],"states.":[12],"We":[13],"challenge":[14],"this":[15,122],"through":[16],"three":[17],"experiments":[18],"spanning":[19],"the":[20,107,140,155],"preference":[21,152],"pipeline.":[22],"In":[23,86],"a":[24,87,151],"human":[25,168],"choice":[26,37],"blindness":[27,38,71],"study,":[28],"91%":[29],"of":[30,43,101],"surreptitiously":[31],"swapped":[32],"go":[34],"undetected,":[35],"extending":[36],"to":[39,72,76,95,99,124],"third-person":[40],"evaluative":[41],"comparison":[42],"unfamiliar":[44],"text.":[45],"Testing":[46],"fifteen":[47],"LLM":[48,170],"judges":[49],"as":[50],"potential":[51],"replacements,":[52],"we":[53],"find":[54],"detection":[55],"relies":[56],"on":[57],"shallow":[58],"text":[59],"matching":[60],"rather":[61],"than":[62],"genuine":[63],"self-monitoring:":[64],"removing":[65],"prior":[66],"reasoning":[67],"context":[69,163],"causes":[70],"surge":[73],"near-zero":[75],"over":[77,136],"50%,":[78],"while":[79,139],"explicit":[80],"social":[81],"pressure":[82],"induces":[83],"near-universal":[84],"compliance.":[85],"dose-response":[88],"experiment":[89],"across":[90],"two":[91],"architectures":[92],"86M":[94],"2B":[96],"parameters,":[97],"one-sixth":[98],"one-third":[100],"labels":[102],"must":[103],"be":[104],"corrupted":[105],"before":[106],"reward":[108],"signal":[109,156],"halves,":[110],"yet":[111],"standard":[112,173],"pairwise":[113],"accuracy":[114],"remains":[115],"virtually":[116],"unchanged.":[117],"A":[118],"Best-of-N":[119],"evaluation":[120,174],"confirms":[121],"translates":[123],"downstream":[125],"policy":[126],"degradation:":[127],"at":[128],"50%":[129],"corruption,":[130],"reward-guided":[131],"selection":[132],"produces":[133],"no":[134],"improvement":[135],"random":[137],"sampling,":[138],"proxy":[141],"model":[142],"reports":[143],"monotonically":[144],"increasing":[145],"scores.":[146],"Together,":[147],"these":[148],"results":[149],"reveal":[150],"construction":[153],"problem:":[154],"entering":[157],"RLHF":[158],"is":[159],"shaped":[160],"by":[161],"elicitation":[162],"in":[164],"ways":[165],"that":[166],"neither":[167],"metacognition,":[169],"self-monitoring,":[171],"nor":[172],"metrics":[175],"can":[176],"detect.":[177]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-11T00:00:00"}
