{"id":"https://openalex.org/W7139054217","doi":"https://doi.org/10.48550/arxiv.2603.17216","title":"AI Scientist via Synthetic Task Scaling","display_name":"AI Scientist via Synthetic Task Scaling","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7139054217","doi":"https://doi.org/10.48550/arxiv.2603.17216"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.17216","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17216","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.17216","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019302184","display_name":"Ziyang Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cai, Ziyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5081255348","display_name":"Harkirat Behl","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Behl, Harkirat","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5019302184"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.33160001039505005,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.33160001039505005,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.14030000567436218,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0722000002861023,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7163000106811523},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6233999729156494},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5608000159263611},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5508000254631042},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.4544000029563904},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.3758000135421753}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.722599983215332},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7163000106811523},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7160000205039978},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6697999835014343},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6233999729156494},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5608000159263611},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5508000254631042},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.4544000029563904},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.3758000135421753},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.36059999465942383},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3314000070095062},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.32910001277923584},{"id":"https://openalex.org/C2780589192","wikidata":"https://www.wikidata.org/wiki/Q7285140","display_name":"Raising (metalworking)","level":2,"score":0.27399998903274536},{"id":"https://openalex.org/C2986563244","wikidata":"https://www.wikidata.org/wiki/Q6822310","display_name":"Learning to learn","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C2776937632","wikidata":"https://www.wikidata.org/wiki/Q4117718","display_name":"Program synthesis","level":2,"score":0.2605000138282776}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.17216","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17216","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.17216","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.17216","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,78,102,108,124,140,153,177],"advent":[2],"of":[3,126],"AI":[4],"agents,":[5],"automatic":[6],"scientific":[7],"discovery":[8],"has":[9],"become":[10],"a":[11,29,59,119,133,147,157],"tenable":[12],"goal.":[13],"Many":[14],"recent":[15],"works":[16],"scaffold":[17],"agentic":[18],"systems":[19],"that":[20,52],"can":[21,53],"perform":[22],"machine":[23,66,73,98,136],"learning":[24,67,74,99,137],"research,":[25],"but":[26,43],"don't":[27],"offer":[28],"principled":[30],"way":[31],"to":[32,155],"train":[33,156],"such":[34],"agents":[35,51],"--":[36],"and":[37,86,111,161,184],"current":[38],"LLMs":[39],"often":[40],"generate":[41],"plausible-looking":[42],"ineffective":[44],"ideas.":[45],"To":[46,122],"make":[47],"progress":[48],"on":[49,174],"training":[50],"learn":[54],"from":[55,146],"doing,":[56],"we":[57,130,143],"provide":[58],"novel":[60],"synthetic":[61,91,128,141,169],"environment":[62],"generation":[63],"pipeline":[64,70],"targeting":[65],"agents.":[68],"Our":[69],"automatically":[71],"synthesizes":[72],"challenges":[75],"compatible":[76],"with":[77,118,167],"SWE-agent":[79],"framework,":[80],"covering":[81],"topic":[82],"sampling,":[83],"dataset":[84],"proposal,":[85],"code":[87],"generation.":[88],"The":[89,163],"resulting":[90],"tasks":[92,170],"are":[93,105,112],"1)":[94],"grounded":[95],"in":[96],"real":[97],"datasets,":[100],"because":[101],"proposed":[103],"datasets":[104],"verified":[106,114],"against":[107],"Huggingface":[109],"API":[110],"2)":[113],"for":[115,135,182,186],"higher":[116],"quality":[117],"self-debugging":[120],"loop.":[121],"validate":[123],"effectiveness":[125],"our":[127,168],"tasks,":[129,142],"tackle":[131],"MLGym,":[132,175],"benchmark":[134],"tasks.":[138],"From":[139],"sample":[144],"trajectories":[145,154],"teacher":[148],"model":[149,159],"(GPT-5),":[150],"then":[151],"use":[152],"student":[158,164],"(Qwen3-4B":[160],"Qwen3-8B).":[162],"models":[165],"trained":[166],"achieve":[171],"improved":[172],"performance":[173],"raising":[176],"AUP":[178],"metric":[179],"by":[180],"9%":[181],"Qwen3-4B":[183],"12%":[185],"Qwen3-8B.":[187]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
