{"id":"https://openalex.org/W4416322339","doi":"https://doi.org/10.3390/make7040148","title":"Model-Aware Automatic Benchmark Generation with Self-Error Instructions for Data-Driven Models","display_name":"Model-Aware Automatic Benchmark Generation with Self-Error Instructions for Data-Driven Models","publication_year":2025,"publication_date":"2025-11-18","ids":{"openalex":"https://openalex.org/W4416322339","doi":"https://doi.org/10.3390/make7040148"},"language":"en","primary_location":{"id":"doi:10.3390/make7040148","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040148","pdf_url":"https://www.mdpi.com/2504-4990/7/4/148/pdf?version=1763474040","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-4990/7/4/148/pdf?version=1763474040","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028962425","display_name":"Kirill Zakharov","orcid":"https://orcid.org/0000-0001-5774-4076"},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Kirill Zakharov","raw_affiliation_strings":["Research Center \u201cStrong Artificial Intelligence in Industry\u201d, ITMO University, Birzhevaya Liniya 14, Saint Petersburg 199034, Russia","Research Center \"Strong Artificial Intelligence in Industry\", ITMO University, Birzhevaya Liniya 14, Saint Petersburg 199034, Russia"],"affiliations":[{"raw_affiliation_string":"Research Center \u201cStrong Artificial Intelligence in Industry\u201d, ITMO University, Birzhevaya Liniya 14, Saint Petersburg 199034, Russia","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"Research Center \"Strong Artificial Intelligence in Industry\", ITMO University, Birzhevaya Liniya 14, Saint Petersburg 199034, Russia","institution_ids":["https://openalex.org/I4210164862"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035754267","display_name":"Alexander V. Boukhanovsky","orcid":"https://orcid.org/0000-0003-1588-8164"},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]},{"id":"https://openalex.org/I55027601","display_name":"Netherlands Institute for Advanced Study in the Humanities and Social Sciences","ror":"https://ror.org/04z3wz653","country_code":"NL","type":"facility","lineage":["https://openalex.org/I1322597698","https://openalex.org/I55027601"]}],"countries":["CA","NL"],"is_corresponding":false,"raw_author_name":"Alexander Boukhanovsky","raw_affiliation_strings":["Netherlands Institute of Advanced Studies, Korte Spinhuissteeg 3, 1012 CG Amsterdam, The Netherlands","Research Center \u201cStrong Artificial Intelligence in Industry\u201d, ITMO University, Birzhevaya Liniya 14, Saint Petersburg 199034, Russia","Research Center \"Strong Artificial Intelligence in Industry\", ITMO University, Birzhevaya Liniya 14, Saint Petersburg 199034, Russia"],"affiliations":[{"raw_affiliation_string":"Netherlands Institute of Advanced Studies, Korte Spinhuissteeg 3, 1012 CG Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I55027601"]},{"raw_affiliation_string":"Research Center \u201cStrong Artificial Intelligence in Industry\u201d, ITMO University, Birzhevaya Liniya 14, Saint Petersburg 199034, Russia","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"Research Center \"Strong Artificial Intelligence in Industry\", ITMO University, Birzhevaya Liniya 14, Saint Petersburg 199034, Russia","institution_ids":["https://openalex.org/I4210164862"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5028962425"],"corresponding_institution_ids":["https://openalex.org/I4210164862"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19147196,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":"4","first_page":"148","last_page":"148"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.6732000112533569,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.6732000112533569,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.05380000174045563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.04529999941587448,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.855400025844574},{"id":"https://openalex.org/keywords/data-point","display_name":"Data point","score":0.38909998536109924},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.3889000117778778},{"id":"https://openalex.org/keywords/regression","display_name":"Regression","score":0.3732999861240387},{"id":"https://openalex.org/keywords/data-driven","display_name":"Data-driven","score":0.32899999618530273},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.31450000405311584},{"id":"https://openalex.org/keywords/experimental-data","display_name":"Experimental data","score":0.2971000075340271}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.855400025844574},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7448999881744385},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6583999991416931},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5400999784469604},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4205000102519989},{"id":"https://openalex.org/C21080849","wikidata":"https://www.wikidata.org/wiki/Q13611879","display_name":"Data point","level":2,"score":0.38909998536109924},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3889000117778778},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.3732999861240387},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.31450000405311584},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.2971000075340271},{"id":"https://openalex.org/C110332635","wikidata":"https://www.wikidata.org/wiki/Q629498","display_name":"Genetic programming","level":2,"score":0.295199990272522},{"id":"https://openalex.org/C152877465","wikidata":"https://www.wikidata.org/wiki/Q208042","display_name":"Regression analysis","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.28529998660087585},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2825999855995178},{"id":"https://openalex.org/C8880873","wikidata":"https://www.wikidata.org/wiki/Q187787","display_name":"Genetic algorithm","level":2,"score":0.2549000084400177},{"id":"https://openalex.org/C12426560","wikidata":"https://www.wikidata.org/wiki/Q189569","display_name":"Basis (linear algebra)","level":2,"score":0.2549000084400177}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/make7040148","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040148","pdf_url":"https://www.mdpi.com/2504-4990/7/4/148/pdf?version=1763474040","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:2e6b6032b9884780bdbdeac6bebea9c3","is_oa":true,"landing_page_url":"https://doaj.org/article/2e6b6032b9884780bdbdeac6bebea9c3","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction, Vol 7, Iss 4, p 148 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/make7040148","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040148","pdf_url":"https://www.mdpi.com/2504-4990/7/4/148/pdf?version=1763474040","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320324632","display_name":"Ministero dello Sviluppo Economico","ror":"https://ror.org/011z3ff80"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416322339.pdf"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W2010941383","https://openalex.org/W2167055186","https://openalex.org/W2760103357","https://openalex.org/W2966284335","https://openalex.org/W3005880794","https://openalex.org/W3177395556","https://openalex.org/W4220862496","https://openalex.org/W4391054870","https://openalex.org/W4391939497","https://openalex.org/W4403487680","https://openalex.org/W4403572618","https://openalex.org/W4412944840"],"related_works":[],"abstract_inverted_index":{"The":[0],"growing":[1],"number":[2,49],"of":[3,28,50,103,135],"domain-specific":[4],"machine":[5,121],"learning":[6,122],"benchmarks":[7],"has":[8],"driven":[9],"methodological":[10],"progress,":[11],"yet":[12],"real-world":[13],"deployments":[14],"require":[15],"a":[16,43,72,79,96,108],"different":[17],"evaluation":[18,134],"approach.":[19],"Model-aware":[20],"synthetic":[21],"benchmarks,":[22],"designed":[23],"to":[24,33,61,82,99,119],"emphasize":[25],"failure":[26],"modes":[27],"existing":[29],"models,":[30],"are":[31],"proposed":[32,71],"address":[34,67],"this":[35,68],"need.":[36],"However,":[37],"evaluating":[38],"already":[39],"well-performing":[40],"models":[41,89,137],"presents":[42],"significant":[44],"challenge,":[45],"as":[46],"the":[47,84,101,132],"limited":[48],"high-quality":[51],"data":[52,85],"points":[53,86],"where":[54,87],"they":[55],"exhibit":[56,90],"errors":[57],"makes":[58],"it":[59],"difficult":[60],"obtain":[62],"statistically":[63],"reliable":[64],"estimates.":[65],"To":[66],"gap,":[69],"we":[70],"two-step":[73],"benchmark":[74,113],"construction":[75],"process:":[76],"(i)":[77],"using":[78,95],"genetic":[80],"algorithm":[81],"augment":[83],"data-driven":[88,136],"poor":[91],"prediction":[92],"quality;":[93],"(ii)":[94],"generative":[97],"model":[98],"approximate":[100],"distribution":[102],"these":[104],"points.":[105],"We":[106],"established":[107],"general":[109],"formulation":[110],"for":[111,138],"such":[112],"construction,":[114],"which":[115],"can":[116],"be":[117],"adapted":[118],"non-classical":[120],"models.":[123],"Our":[124],"experimental":[125],"study":[126],"demonstrates":[127],"that":[128],"our":[129],"approach":[130],"enables":[131],"accurate":[133],"both":[139],"regression":[140],"and":[141],"classification":[142],"problems.":[143]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-11-18T00:00:00"}
