{"id":"https://openalex.org/W7137938785","doi":"https://doi.org/10.48550/arxiv.2603.15542","title":"InterveneBench: Benchmarking LLMs for Intervention Reasoning and Causal Study Design in Real Social Systems","display_name":"InterveneBench: Benchmarking LLMs for Intervention Reasoning and Causal Study Design in Real Social Systems","publication_year":2026,"publication_date":"2026-03-16","ids":{"openalex":"https://openalex.org/W7137938785","doi":"https://doi.org/10.48550/arxiv.2603.15542"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.15542","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15542","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.15542","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129727264","display_name":"Shaojie Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shi, Shaojie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129660762","display_name":"Zhengyu Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Zhengyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129718872","display_name":"Lingran Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Lingran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129704572","display_name":"Xinyu Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Xinyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129714291","display_name":"Anna Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Anna","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111301592","display_name":"Bohao Lv","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lv, Bohao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129652324","display_name":"Rui Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Rui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129669303","display_name":"Zijian Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zijian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100619469","display_name":"Zhichao Chen","orcid":"https://orcid.org/0000-0001-5415-170X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Zhichao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129641665","display_name":"Guolei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Guolei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091097798","display_name":"Naifu Zhang","orcid":"https://orcid.org/0000-0003-2586-3841"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Naifu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129678003","display_name":"Mingjian Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Mingjian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129746182","display_name":"Zhuo Quan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Quan, Zhuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129737692","display_name":"Bohao Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Bohao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090685223","display_name":"Teqi Hao","orcid":"https://orcid.org/0009-0006-9510-8154"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao, Teqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129717217","display_name":"Yuan Qi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qi, Yuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129669294","display_name":"Yinghui Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yinghui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129741230","display_name":"Libo Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Libo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":18,"corresponding_author_ids":["https://openalex.org/A5129727264"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.33399999141693115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.33399999141693115,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10845","display_name":"Advanced Causal Inference Techniques","score":0.1534000039100647,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.11110000312328339,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/causal-inference","display_name":"Causal inference","score":0.6854000091552734},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.671500027179718},{"id":"https://openalex.org/keywords/causal-reasoning","display_name":"Causal reasoning","score":0.5853000283241272},{"id":"https://openalex.org/keywords/psychological-intervention","display_name":"Psychological intervention","score":0.5134999752044678},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4884999990463257},{"id":"https://openalex.org/keywords/causal-model","display_name":"Causal model","score":0.48840001225471497},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4797999858856201},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.47940000891685486},{"id":"https://openalex.org/keywords/intervention","display_name":"Intervention (counseling)","score":0.4047999978065491}],"concepts":[{"id":"https://openalex.org/C158600405","wikidata":"https://www.wikidata.org/wiki/Q5054566","display_name":"Causal inference","level":2,"score":0.6854000091552734},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.671500027179718},{"id":"https://openalex.org/C115086926","wikidata":"https://www.wikidata.org/wiki/Q17004651","display_name":"Causal reasoning","level":3,"score":0.5853000283241272},{"id":"https://openalex.org/C27415008","wikidata":"https://www.wikidata.org/wiki/Q7256382","display_name":"Psychological intervention","level":2,"score":0.5134999752044678},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.503000020980835},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4884999990463257},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.48840001225471497},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4797999858856201},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.47940000891685486},{"id":"https://openalex.org/C2780665704","wikidata":"https://www.wikidata.org/wiki/Q959298","display_name":"Intervention (counseling)","level":2,"score":0.4047999978065491},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.4036000072956085},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.34769999980926514},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.34130001068115234},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.33799999952316284},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.33079999685287476},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3303999900817871},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.32820001244544983},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.30889999866485596},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2928999960422516},{"id":"https://openalex.org/C166151441","wikidata":"https://www.wikidata.org/wiki/Q4923601","display_name":"Causation","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C179133015","wikidata":"https://www.wikidata.org/wiki/Q1639378","display_name":"Social system","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.27799999713897705},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C5570062","wikidata":"https://www.wikidata.org/wiki/Q3919817","display_name":"Behavioural sciences","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C44725695","wikidata":"https://www.wikidata.org/wiki/Q288156","display_name":"Normative","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.2603999972343445},{"id":"https://openalex.org/C126349790","wikidata":"https://www.wikidata.org/wiki/Q905036","display_name":"Computational sociology","level":2,"score":0.26030001044273376},{"id":"https://openalex.org/C190248442","wikidata":"https://www.wikidata.org/wiki/Q839486","display_name":"Qualitative research","level":2,"score":0.25699999928474426},{"id":"https://openalex.org/C64357122","wikidata":"https://www.wikidata.org/wiki/Q1149766","display_name":"Causality (physics)","level":2,"score":0.2563999891281128},{"id":"https://openalex.org/C2776325391","wikidata":"https://www.wikidata.org/wiki/Q6917865","display_name":"Motivated reasoning","level":3,"score":0.25600001215934753},{"id":"https://openalex.org/C87156501","wikidata":"https://www.wikidata.org/wiki/Q7268708","display_name":"Qualitative property","level":2,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.15542","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15542","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.15542","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15542","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Causal":[0],"inference":[1],"in":[2,12,39,45],"social":[3,41,52],"science":[4,53],"relies":[5],"on":[6],"end-to-end,":[7],"intervention-centered":[8],"research-design":[9],"reasoning":[10,38,112],"grounded":[11],"real-world":[13],"policy":[14,61,82],"interventions,":[15],"but":[16],"current":[17],"benchmarks":[18],"fail":[19],"to":[20,35,58,68],"evaluate":[21],"this":[22,92,96],"capability":[23],"of":[24],"large":[25],"language":[26],"models":[27,57],"(LLMs).":[28],"We":[29],"present":[30],"InterveneBench,":[31],"a":[32,101],"benchmark":[33],"designed":[34],"assess":[36],"such":[37],"realistic":[40],"settings.":[42],"Each":[43],"instance":[44],"InterveneBench":[46,75],"is":[47],"derived":[48],"from":[49],"an":[50],"empirical":[51],"study":[54],"and":[55,63,116],"requires":[56],"reason":[59],"about":[60],"interventions":[62],"identification":[64],"assumptions":[65],"without":[66],"access":[67],"predefined":[69],"causal":[70],"graphs":[71],"or":[72],"structural":[73],"equations.":[74],"comprises":[76],"744":[77],"peer-reviewed":[78],"studies":[79],"across":[80],"diverse":[81],"domains.":[83],"Experimental":[84],"results":[85],"show":[86],"that":[87],"state-of-the-art":[88,111],"LLMs":[89],"struggle":[90],"under":[91],"setting.":[93],"To":[94],"address":[95],"limitation,":[97],"we":[98],"further":[99],"propose":[100],"multi-agent":[102],"framework,":[103],"STRIDES.":[104],"It":[105],"achieves":[106],"significant":[107],"performance":[108],"improvements":[109],"over":[110],"models.":[113],"Our":[114],"code":[115],"data":[117],"are":[118],"available":[119],"at":[120],"https://github.com/Sii-yuning/STRIDES.":[121]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
