{"id":"https://openalex.org/W7138015695","doi":"https://doi.org/10.1609/aaai.v40i38.40540","title":"DSCodeBench: A Realistic Benchmark for Data Science Code Generation","display_name":"DSCodeBench: A Realistic Benchmark for Data Science Code Generation","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138015695","doi":"https://doi.org/10.1609/aaai.v40i38.40540"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v40i38.40540","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40540","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40540/44501","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40540/44501","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129745369","display_name":"Shuyin Ouyang","orcid":null},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Shuyin Ouyang","raw_affiliation_strings":["King's College London"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"King's College London","institution_ids":["https://openalex.org/I183935753"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129668599","display_name":"Dong HUANG","orcid":null},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Dong HUANG","raw_affiliation_strings":["Institute of Data Science, National University of Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Data Science, National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129667762","display_name":"Jingwen Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jingwen Guo","raw_affiliation_strings":["King's College London"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"King's College London","institution_ids":["https://openalex.org/I183935753"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129688016","display_name":"Zeyu Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeyu Sun","raw_affiliation_strings":["Institute of Software Chinese Academy of Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Software Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210128818"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129696013","display_name":"Qihao Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qihao Zhu","raw_affiliation_strings":["Peking University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129733679","display_name":"Jie M. Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jie M. Zhang","raw_affiliation_strings":["King's College London"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"King's College London","institution_ids":["https://openalex.org/I183935753"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07920792,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"38","first_page":"32628","last_page":"32636"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.14980000257492065,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.14980000257492065,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.14730000495910645,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13650","display_name":"Computational Physics and Python Applications","score":0.1404000073671341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.7233999967575073},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5406000018119812},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4722999930381775},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.44209998846054077},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.3991999924182892},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.3962000012397766},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.3894999921321869},{"id":"https://openalex.org/keywords/debugging","display_name":"Debugging","score":0.36959999799728394},{"id":"https://openalex.org/keywords/unit-testing","display_name":"Unit testing","score":0.3571999967098236}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7922999858856201},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.7233999967575073},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5406000018119812},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4722999930381775},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.44209998846054077},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3991999924182892},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.3962000012397766},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.3894999921321869},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.38920000195503235},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36980000138282776},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.36959999799728394},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.36480000615119934},{"id":"https://openalex.org/C148027188","wikidata":"https://www.wikidata.org/wiki/Q907375","display_name":"Unit testing","level":3,"score":0.3571999967098236},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3483000099658966},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.34209999442100525},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.34119999408721924},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.34040001034736633},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.335999995470047},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.3203999996185303},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3131999969482422},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3098999857902527},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28870001435279846},{"id":"https://openalex.org/C128942645","wikidata":"https://www.wikidata.org/wiki/Q1568346","display_name":"Test case","level":3,"score":0.26899999380111694},{"id":"https://openalex.org/C2994469624","wikidata":"https://www.wikidata.org/wiki/Q101965","display_name":"Experimental science","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26840001344680786},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.25760000944137573},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1609/aaai.v40i38.40540","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40540","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40540/44501","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:ojs.aaai.org:article/40540","is_oa":false,"landing_page_url":"https://ojs.aaai.org/index.php/AAAI/article/view/40540","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2159-5399","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i38.40540","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40540","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/40540/44501","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4269887845","display_name":null,"funder_award_id":"EP/S023356/1","funder_id":"https://openalex.org/F4320314731","funder_display_name":"UK Research and Innovation"},{"id":"https://openalex.org/G8355433955","display_name":null,"funder_award_id":"62402482","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320313144","display_name":"ITEA","ror":null},{"id":"https://openalex.org/F4320314731","display_name":"UK Research and Innovation","ror":"https://ror.org/001aqnf71"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138015695.pdf","grobid_xml":"https://content.openalex.org/works/W7138015695.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,162],"introduce":[1],"DSCodeBench,":[2,73],"a":[3,45,76,141,150,168],"new":[4],"benchmark":[5],"designed":[6],"to":[7,101,130,153],"evaluate":[8],"large":[9,151],"language":[10],"models":[11,122],"(LLMs)":[12],"on":[13],"complicated":[14],"and":[15,48,61,66,89,104,170],"realistic":[16,31,156],"data":[17,40,57,157,176],"science":[18,41,58,158,177],"code":[19,53,84,159],"generation":[20,160],"tasks.":[21,161],"DSCodeBench":[22,43,115,164],"consists":[23],"of":[24,108,143],"1,000":[25],"carefully":[26],"constructed":[27],"problems":[28,32],"sourced":[29],"from":[30,33],"GitHub":[34],"across":[35],"ten":[36],"widely":[37],"used":[38],"Python":[39],"libraries.":[42],"offers":[44],"more":[46,51,55],"challenging":[47],"representative":[49],"testbed,":[50],"complex":[52],"solutions,":[54],"comprehensive":[56],"libraries,":[59],"clearer":[60],"better":[62],"structured":[63],"problem":[64,90],"descriptions,":[65],"stronger":[67],"test":[68,86],"suites.":[69],"To":[70],"construct":[71],"the":[72,106,109],"we":[74,137],"develop":[75],"robust":[77,117],"pipeline":[78],"that":[79,114,146],"combines":[80],"task":[81],"scope":[82],"selection,":[83],"construction,":[85],"case":[87],"generation,":[88],"description":[91],"synthesis.":[92],"The":[93,134],"process":[94],"is":[95],"paired":[96],"with":[97],"rigorous":[98,169],"manual":[99],"editing":[100],"ensure":[102],"alignment":[103],"enhance":[105],"reliability":[107],"evaluation.":[110],"Experimental":[111],"result":[112],"shows":[113],"exhibits":[116],"scaling":[118],"behavior,":[119],"where":[120],"larger":[121],"systematically":[123],"outperform":[124],"smaller":[125],"ones,":[126],"validating":[127],"its":[128],"ability":[129],"distinguish":[131],"model":[132],"capabilities.":[133],"best":[135],"LLM":[136],"test,":[138],"GPT-4o,":[139],"has":[140],"pass@1":[142],"0.392,":[144],"indicating":[145],"LLMs":[147],"still":[148],"have":[149],"room":[152],"improve":[154],"for":[155,173],"believe":[163],"will":[165],"serve":[166],"as":[167],"trustworthy":[171],"foundation":[172],"advancing":[174],"LLM-based":[175],"programming.":[178]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-18T00:00:00"}
