{"id":"https://openalex.org/W7101829935","doi":"https://doi.org/10.48550/arxiv.2510.23166","title":"Common Task Framework For a Critical Evaluation of Scientific Machine Learning Algorithms","display_name":"Common Task Framework For a Critical Evaluation of Scientific Machine Learning Algorithms","publication_year":2025,"publication_date":"2025-10-27","ids":{"openalex":"https://openalex.org/W7101829935","doi":"https://doi.org/10.48550/arxiv.2510.23166"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2510.23166","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.23166","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2510.23166","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Wyder, Philippe Martin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wyder, Philippe Martin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Goldfeder, Judah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goldfeder, Judah","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yermakov, Alexey","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yermakov, Alexey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zhao, Yue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Yue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Riva, Stefano","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Riva, Stefano","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Williams, Jan P.","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Williams, Jan P.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Zoro, David","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zoro, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Rude, Amy Sara","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rude, Amy Sara","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Tomasetto, Matteo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tomasetto, Matteo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Germany, Joe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Germany, Joe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Bakarji, Joseph","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bakarji, Joseph","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Maierhofer, Georg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maierhofer, Georg","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Cranmer, Miles","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cranmer, Miles","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Kutz, J. Nathan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kutz, J. Nathan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.2628999948501587,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.2628999948501587,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.1062999963760376,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.07590000331401825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5690000057220459},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5584999918937683},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5092999935150146},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4291999936103821},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.383899986743927},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.35989999771118164},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.33329999446868896},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.30250000953674316}],"concepts":[{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.8019000291824341},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7409999966621399},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7099000215530396},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5690000057220459},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5584999918937683},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5092999935150146},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4291999936103821},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.383899986743927},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.35989999771118164},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.33329999446868896},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.29249998927116394},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.2912999987602234},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2766999900341034},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2662000060081482},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2612999975681305},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.2563000023365021},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.2547000050544739}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2510.23166","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.23166","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2510.23166","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2510.23166","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Machine":[0],"learning":[1],"(ML)":[2],"is":[3,180],"transforming":[4],"modeling":[5],"and":[6,12,33,44,70,77,84,99,128,143,150,199],"control":[7],"in":[8,93,138,201],"the":[9,20,89,133,136,195],"physical,":[10],"engineering,":[11],"biological":[13],"sciences.":[14],"However,":[15],"rapid":[16],"development":[17],"has":[18],"outpaced":[19],"creation":[21],"of":[22,68,91,112,135,148],"standardized,":[23],"objective":[24],"benchmarks":[25],"-":[26],"leading":[27],"to":[28,173,181],"weak":[29],"baselines,":[30],"reporting":[31],"bias,":[32],"inconsistent":[34],"evaluations":[35,188],"across":[36],"methods.":[37],"This":[38],"undermines":[39],"reproducibility,":[40],"misguides":[41],"resource":[42],"allocation,":[43],"obscures":[45],"scientific":[46,59,202],"progress.":[47],"To":[48],"address":[49],"this,":[50],"we":[51,119,154],"propose":[52],"a":[53,65,105,116,157,160,169],"Common":[54],"Task":[55],"Framework":[56],"(CTF)":[57],"for":[58,109,145,197],"machine":[60],"learning.":[61],"The":[62],"CTF":[63,137],"features":[64],"curated":[66],"set":[67],"datasets":[69],"task-specific":[71],"metrics":[72],"spanning":[73],"forecasting,":[74],"state":[75],"reconstruction,":[76],"generalization":[78],"under":[79],"realistic":[80],"constraints,":[81],"including":[82],"noise":[83],"limited":[85],"data.":[86],"Inspired":[87],"by":[88],"success":[90],"CTFs":[92],"fields":[94],"like":[95],"natural":[96],"language":[97],"processing":[98],"computer":[100],"vision,":[101],"our":[102],"framework":[103],"provides":[104],"structured,":[106],"rigorous":[107],"foundation":[108],"head-to-head":[110],"evaluation":[111],"diverse":[113,151],"algorithms.":[114],"As":[115],"first":[117],"step,":[118],"benchmark":[120],"methods":[121],"on":[122,189],"two":[123],"canonical":[124],"nonlinear":[125],"systems:":[126],"Kuramoto-Sivashinsky":[127],"Lorenz.":[129],"These":[130],"results":[131],"illustrate":[132],"utility":[134],"revealing":[139],"method":[140],"strengths,":[141],"limitations,":[142],"suitability":[144],"specific":[146],"classes":[147],"problems":[149],"objectives.":[152],"Next,":[153],"are":[155],"launching":[156],"competition":[158],"around":[159],"global":[161],"real":[162],"world":[163],"sea":[164],"surface":[165],"temperature":[166],"dataset":[167,172],"with":[168,186],"true":[170],"holdout":[171],"foster":[174],"community":[175],"engagement.":[176],"Our":[177],"long-term":[178],"vision":[179],"replace":[182],"ad":[183],"hoc":[184],"comparisons":[185],"standardized":[187],"hidden":[190],"test":[191],"sets":[192],"that":[193],"raise":[194],"bar":[196],"rigor":[198],"reproducibility":[200],"ML.":[203]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-29T00:00:00"}
