{"id":"https://openalex.org/W4403345800","doi":"https://doi.org/10.48550/arxiv.2410.07095","title":"MLE-bench: Evaluating Machine Learning Agents on Machine Learning Engineering","display_name":"MLE-bench: Evaluating Machine Learning Agents on Machine Learning Engineering","publication_year":2024,"publication_date":"2024-10-09","ids":{"openalex":"https://openalex.org/W4403345800","doi":"https://doi.org/10.48550/arxiv.2410.07095"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.07095","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.07095","pdf_url":"https://arxiv.org/pdf/2410.07095","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.07095","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047504721","display_name":"Jun Shern Chan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chan, Jun Shern","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053186663","display_name":"Neil Chowdhury","orcid":"https://orcid.org/0000-0002-0590-993X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chowdhury, Neil","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109819213","display_name":"Oliver Jaffe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jaffe, Oliver","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109819214","display_name":"James Aung","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aung, James","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5098316662","display_name":"Dane Sherburn","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sherburn, Dane","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114245168","display_name":"Evan Mays","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mays, Evan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014595992","display_name":"Giulio Starace","orcid":"https://orcid.org/0000-0001-5284-4238"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Starace, Giulio","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100658205","display_name":"Kevin Liu","orcid":"https://orcid.org/0000-0001-7688-5819"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Kevin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109819215","display_name":"Leon Maksin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maksin, Leon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109819212","display_name":"Tejal Patwardhan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patwardhan, Tejal","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103810699","display_name":"Lilian Weng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weng, Lilian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032574693","display_name":"Aleksander M\u0105dry","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"M\u0105dry, Aleksander","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5047504721"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.758400022983551,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.758400022983551,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5386483669281006},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4350692331790924},{"id":"https://openalex.org/keywords/bench-to-bedside","display_name":"Bench to bedside","score":0.42000266909599304},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3720247149467468},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.05809339880943298},{"id":"https://openalex.org/keywords/medical-physics","display_name":"Medical physics","score":0.035960763692855835}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5386483669281006},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4350692331790924},{"id":"https://openalex.org/C3020000666","wikidata":"https://www.wikidata.org/wiki/Q17112037","display_name":"Bench to bedside","level":2,"score":0.42000266909599304},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3720247149467468},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.05809339880943298},{"id":"https://openalex.org/C19527891","wikidata":"https://www.wikidata.org/wiki/Q1120908","display_name":"Medical physics","level":1,"score":0.035960763692855835}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.07095","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.07095","pdf_url":"https://arxiv.org/pdf/2410.07095","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.07095","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.07095","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.07095","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.07095","pdf_url":"https://arxiv.org/pdf/2410.07095","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4403345800.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"We":[0,49,61,120],"introduce":[1],"MLE-bench,":[2],"a":[3,28,89],"benchmark":[4,123],"for":[5,53,110],"measuring":[6],"how":[7],"well":[8],"AI":[9,111,137],"agents":[10,112],"perform":[11],"at":[12,84],"machine":[13],"learning":[14],"engineering.":[15],"To":[16],"this":[17],"end,":[18],"we":[19,103],"curate":[20],"75":[21],"ML":[22,37,133],"engineering-related":[23],"competitions":[24],"from":[25,118],"Kaggle,":[26],"creating":[27],"diverse":[29],"set":[30],"of":[31,88,95,107,116,136],"challenging":[32],"tasks":[33],"that":[34,76],"test":[35],"real-world":[36],"engineering":[38,134],"skills":[39],"such":[40],"as":[41],"training":[42],"models,":[43],"preparing":[44],"datasets,":[45],"and":[46,113],"running":[47],"experiments.":[48],"establish":[50],"human":[51],"baselines":[52],"each":[54],"competition":[55],"using":[56],"Kaggle's":[57],"publicly":[58],"available":[59],"leaderboards.":[60],"use":[62],"open-source":[63,121],"agent":[64],"scaffolds":[65],"to":[66,99,126],"evaluate":[67],"several":[68],"frontier":[69],"language":[70],"models":[71],"on":[72],"our":[73,100,122],"benchmark,":[74],"finding":[75],"the":[77,86,114,132],"best-performing":[78],"setup--OpenAI's":[79],"o1-preview":[80],"with":[81],"AIDE":[82],"scaffolding--achieves":[83],"least":[85],"level":[87],"Kaggle":[90],"bronze":[91],"medal":[92],"in":[93,130],"16.9%":[94],"competitions.":[96],"In":[97],"addition":[98],"main":[101],"results,":[102],"investigate":[104],"various":[105],"forms":[106],"resource":[108],"scaling":[109],"impact":[115],"contamination":[117],"pre-training.":[119],"code":[124],"(github.com/openai/mle-bench/)":[125],"facilitate":[127],"future":[128],"research":[129],"understanding":[131],"capabilities":[135],"agents.":[138]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
