{"id":"https://openalex.org/W7162660478","doi":"https://doi.org/10.48550/arxiv.2605.27463","title":"When prompt perturbations break your A/B test: A valid statistical test for generative surveying","display_name":"When prompt perturbations break your A/B test: A valid statistical test for generative surveying","publication_year":2026,"publication_date":"2026-05-26","ids":{"openalex":"https://openalex.org/W7162660478","doi":"https://doi.org/10.48550/arxiv.2605.27463"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.27463","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.27463","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.27463","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135869967","display_name":"Hayden Helm","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Helm, Hayden","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135841308","display_name":"Carey Priebe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Priebe, Carey","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14074","display_name":"Persona Design and Applications","score":0.9869999885559082,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14074","display_name":"Persona Design and Applications","score":0.9869999885559082,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12501","display_name":"Digital Economy and Work Transformation","score":0.000699999975040555,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.0005000000237487257,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.7669000029563904},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.6353999972343445},{"id":"https://openalex.org/keywords/wilcoxon-signed-rank-test","display_name":"Wilcoxon signed-rank test","score":0.5424000024795532},{"id":"https://openalex.org/keywords/resampling","display_name":"Resampling","score":0.5192000269889832},{"id":"https://openalex.org/keywords/permutation","display_name":"Permutation (music)","score":0.5030999779701233},{"id":"https://openalex.org/keywords/statistical-hypothesis-testing","display_name":"Statistical hypothesis testing","score":0.49810001254081726},{"id":"https://openalex.org/keywords/sign","display_name":"Sign (mathematics)","score":0.45719999074935913},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4480000138282776}],"concepts":[{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.7669000029563904},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.6353999972343445},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5792999863624573},{"id":"https://openalex.org/C206041023","wikidata":"https://www.wikidata.org/wiki/Q1751970","display_name":"Wilcoxon signed-rank test","level":3,"score":0.5424000024795532},{"id":"https://openalex.org/C150921843","wikidata":"https://www.wikidata.org/wiki/Q1170431","display_name":"Resampling","level":2,"score":0.5192000269889832},{"id":"https://openalex.org/C21308566","wikidata":"https://www.wikidata.org/wiki/Q7169365","display_name":"Permutation (music)","level":2,"score":0.5030999779701233},{"id":"https://openalex.org/C87007009","wikidata":"https://www.wikidata.org/wiki/Q210832","display_name":"Statistical hypothesis testing","level":2,"score":0.49810001254081726},{"id":"https://openalex.org/C139676723","wikidata":"https://www.wikidata.org/wiki/Q1193832","display_name":"Sign (mathematics)","level":2,"score":0.45719999074935913},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4480000138282776},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.44749999046325684},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4456999897956848},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.4032999873161316},{"id":"https://openalex.org/C96608239","wikidata":"https://www.wikidata.org/wiki/Q1199823","display_name":"Statistical power","level":2,"score":0.3725000023841858},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3675000071525574},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.3662000000476837},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33309999108314514},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.329800009727478},{"id":"https://openalex.org/C101454708","wikidata":"https://www.wikidata.org/wiki/Q17106019","display_name":"Standard Model (mathematical formulation)","level":3,"score":0.30160000920295715},{"id":"https://openalex.org/C2780069185","wikidata":"https://www.wikidata.org/wiki/Q7977945","display_name":"Equivalence (formal languages)","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.29260000586509705},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2831999957561493},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C177918212","wikidata":"https://www.wikidata.org/wiki/Q803623","display_name":"Perturbation (astronomy)","level":2,"score":0.27900001406669617},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2606000006198883},{"id":"https://openalex.org/C128942645","wikidata":"https://www.wikidata.org/wiki/Q1568346","display_name":"Test case","level":3,"score":0.2565999925136566},{"id":"https://openalex.org/C2986587452","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical analysis","level":2,"score":0.2558000087738037}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.27463","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.27463","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.27463","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.27463","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Generative":[0],"surveying":[1,84,118],"--":[2,12],"where":[3],"collections":[4],"of":[5,127,155,164],"LLM-based":[6],"personas":[7],"provide":[8,135],"feedback":[9],"on":[10,43,138],"messages":[11],"has":[13],"emerged":[14],"as":[15],"a":[16,79,92,115],"cheap":[17],"and":[18,35,72,101,134,144,153],"scalable":[19],"alternative":[20],"to":[21,29,114,161],"traditional":[22],"market":[23],"research.":[24],"However,":[25],"LLMs":[26],"are":[27,76,159],"sensitive":[28,160],"small":[30],"variations":[31],"in":[32,56],"prompt":[33],"design":[34],"conclusions":[36],"drawn":[37],"from":[38],"generative":[39,83,117],"surveys":[40],"may":[41],"depend":[42],"arbitrary":[44],"phrasing":[45],"choices.":[46],"Controlling":[47],"for":[48,82],"this":[49,60,99],"sensitivity":[50],"requires":[51],"including":[52,68],"semantically":[53],"equivalent":[54],"perturbations":[55],"the":[57,69,104,125,128,151,156,162,168],"analysis.":[58],"In":[59],"paper,":[61],"we":[62,120,147],"show":[63,148],"that":[64,85,95,149],"standard":[65,108],"hypothesis":[66],"tests,":[67],"sign":[70],"test":[71,94,130],"Wilcoxon":[73],"signed-rank":[74],"test,":[75],"invalid":[77],"under":[78,98,106,131],"statistical":[80],"model":[81,100,170],"includes":[86],"realistic":[87,132],"perturbation":[88],"structure.":[89],"We":[90],"propose":[91],"permutation":[93,129],"is":[96],"valid":[97],"formally":[102],"characterize":[103,124],"conditions":[105],"which":[107],"tests":[109],"fail.":[110],"Applying":[111],"our":[112],"framework":[113],"simple":[116],"problem,":[119],"estimate":[121],"relevant":[122],"parameters,":[123],"power":[126],"conditions,":[133],"practical":[136],"guidance":[137],"budget":[139],"allocation":[140],"across":[141],"personas,":[142],"perturbations,":[143],"replicates.":[145],"Finally,":[146],"both":[150],"magnitude":[152],"direction":[154],"estimated":[157],"effect":[158],"choice":[163],"model,":[165],"even":[166],"within":[167],"same":[169],"family.":[171]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-29T00:00:00"}
