{"id":"https://openalex.org/W4417115574","doi":"https://doi.org/10.48550/arxiv.2510.08758","title":"A Design-based Solution for Causal Inference with Text: Can a Language Model Be Too Large?","display_name":"A Design-based Solution for Causal Inference with Text: Can a Language Model Be Too Large?","publication_year":2025,"publication_date":"2025-10-09","ids":{"openalex":"https://openalex.org/W4417115574","doi":"https://doi.org/10.48550/arxiv.2510.08758"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2510.08758","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.08758","pdf_url":"https://arxiv.org/pdf/2510.08758","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.08758","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049911369","display_name":"Graham Tierney","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tierney, Graham","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092950185","display_name":"Srikar Katta","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Katta, Srikar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023997241","display_name":"Christopher A. Bail","orcid":"https://orcid.org/0000-0002-5310-2372"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bail, Christopher","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080997128","display_name":"Sunshine Hillygus","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hillygus, Sunshine","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5056559023","display_name":"Alexander Volfovsky","orcid":"https://orcid.org/0000-0003-4462-1020"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Volfovsky, Alexander","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5049911369"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.21289999783039093,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.21289999783039093,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.14010000228881836,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.12950000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/causal-inference","display_name":"Causal inference","score":0.6863999962806702},{"id":"https://openalex.org/keywords/causal-model","display_name":"Causal model","score":0.5938000082969666},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4742000102996826},{"id":"https://openalex.org/keywords/affect","display_name":"Affect (linguistics)","score":0.45559999346733093},{"id":"https://openalex.org/keywords/humility","display_name":"Humility","score":0.4438000023365021},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.43389999866485596},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.427700012922287},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.4065000116825104},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.398499995470047},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.37959998846054077}],"concepts":[{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.6863999962806702},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.5938000082969666},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5662999749183655},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5658000111579895},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.47429999709129333},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4742000102996826},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.4603999853134155},{"id":"https://openalex.org/C2776035688","wikidata":"https://www.wikidata.org/wiki/Q1606558","display_name":"Affect (linguistics)","level":2,"score":0.45559999346733093},{"id":"https://openalex.org/C2778456462","wikidata":"https://www.wikidata.org/wiki/Q1186677","display_name":"Humility","level":2,"score":0.4438000023365021},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.43389999866485596},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.427700012922287},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.4065000116825104},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.398499995470047},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.37959998846054077},{"id":"https://openalex.org/C90329073","wikidata":"https://www.wikidata.org/wiki/Q914232","display_name":"Ask price","level":2,"score":0.3612000048160553},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.3492000102996826},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.3456999957561493},{"id":"https://openalex.org/C170133592","wikidata":"https://www.wikidata.org/wiki/Q1806883","display_name":"Latent semantic analysis","level":2,"score":0.3416000008583069},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3409999907016754},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3273000121116638},{"id":"https://openalex.org/C79416737","wikidata":"https://www.wikidata.org/wiki/Q2305519","display_name":"Social learning","level":2,"score":0.3257000148296356},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.31520000100135803},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3122999966144562},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.295199990272522},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.28360000252723694},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2809000015258789},{"id":"https://openalex.org/C131158328","wikidata":"https://www.wikidata.org/wiki/Q1307337","display_name":"Social influence","level":2,"score":0.28049999475479126},{"id":"https://openalex.org/C74672266","wikidata":"https://www.wikidata.org/wiki/Q815859","display_name":"Language acquisition","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2671999931335449},{"id":"https://openalex.org/C84389358","wikidata":"https://www.wikidata.org/wiki/Q1129466","display_name":"Discourse analysis","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2651999890804291},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.2644999921321869},{"id":"https://openalex.org/C64357122","wikidata":"https://www.wikidata.org/wiki/Q1149766","display_name":"Causality (physics)","level":2,"score":0.2632000148296356},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C65965080","wikidata":"https://www.wikidata.org/wiki/Q1806885","display_name":"Latent variable model","level":3,"score":0.2581000030040741},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.25220000743865967}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2510.08758","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.08758","pdf_url":"https://arxiv.org/pdf/2510.08758","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2510.08758","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.08758","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.08758","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.08758","pdf_url":"https://arxiv.org/pdf/2510.08758","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Many":[0],"social":[1,172,178],"science":[2],"questions":[3],"ask":[4],"how":[5],"linguistic":[6],"properties":[7,17],"causally":[8],"affect":[9],"an":[10,116],"audience's":[11],"attitudes":[12],"and":[13,57,106,144,177],"behaviors.":[14],"Because":[15],"text":[16,51,143],"are":[18],"often":[19],"interlinked":[20],"(e.g.,":[21],"angry":[22],"reviews":[23],"use":[24],"profane":[25],"language),":[26],"we":[27,92,128,150],"must":[28],"control":[29],"for":[30,171],"possible":[31],"latent":[32,48,100],"confounding":[33],"to":[34,46],"isolate":[35,151],"causal":[36,153],"effects.":[37,110],"Recent":[38],"literature":[39],"proposes":[40],"adapting":[41],"large":[42],"language":[43],"models":[44,139],"(LLMs)":[45],"learn":[47],"representations":[49,76],"of":[50,67,121,155,162],"that":[52,77,98,130],"successfully":[53],"predict":[54],"both":[55],"treatment":[56,63,81,109],"the":[58,62,68,80,103,119,152,159],"outcome.":[59],"However,":[60],"because":[61],"is":[64],"a":[65,94],"component":[66],"text,":[69],"these":[70],"deep":[71],"learning":[72,75],"methods":[73,132],"risk":[74],"actually":[78],"encode":[79],"itself,":[82],"inducing":[83],"overlap":[84,104],"bias.":[85],"Rather":[86],"than":[87,135],"depending":[88],"on":[89,158,168],"post-hoc":[90],"adjustments,":[91],"introduce":[93],"new":[95,166],"experimental":[96],"design":[97,114],"handles":[99],"confounding,":[101],"avoids":[102],"issue,":[105],"unbiasedly":[107],"estimates":[108],"We":[111],"apply":[112],"this":[113],"in":[115,124],"experiment":[117],"evaluating":[118],"persuasiveness":[120,161],"expressing":[122,156],"humility":[123,157],"political":[125,163],"communication.":[126],"Methodologically,":[127],"demonstrate":[129],"LLM-based":[131],"perform":[133],"worse":[134],"even":[136],"simple":[137],"bag-of-words":[138],"using":[140],"our":[141,147],"real":[142],"outcomes":[145],"from":[146],"experiment.":[148],"Substantively,":[149],"effect":[154],"perceived":[160],"statements,":[164],"offering":[165],"insights":[167],"communication":[169],"effects":[170],"media":[173],"platforms,":[174],"policy":[175],"makers,":[176],"scientists.":[179]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-14T00:00:00"}
