{"id":"https://openalex.org/W2966464106","doi":"https://doi.org/10.1145/3676278","title":"How Much Data Is Sufficient to Learn High-Performing Algorithms?","display_name":"How Much Data Is Sufficient to Learn High-Performing Algorithms?","publication_year":2024,"publication_date":"2024-07-29","ids":{"openalex":"https://openalex.org/W2966464106","doi":"https://doi.org/10.1145/3676278","mag":"2966464106"},"language":"en","primary_location":{"id":"doi:10.1145/3676278","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3676278","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3676278","source":{"id":"https://openalex.org/S118992489","display_name":"Journal of the ACM","issn_l":"0004-5411","issn":["0004-5411","1557-735X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the ACM","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3676278","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068544954","display_name":"Maria-Florina Balcan","orcid":"https://orcid.org/0000-0002-9525-0103"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maria-Florina Balcan","raw_affiliation_strings":["Computer Science, Carnegie Mellon University, Pittsburgh, United States"],"raw_orcid":"https://orcid.org/0000-0002-9525-0103","affiliations":[{"raw_affiliation_string":"Computer Science, Carnegie Mellon University, Pittsburgh, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014004628","display_name":"Dan DeBlasio","orcid":"https://orcid.org/0000-0003-4110-4431"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dan Deblasio","raw_affiliation_strings":["Computational biology, Carnegie Mellon University, Pittsburgh, United States"],"raw_orcid":"https://orcid.org/0000-0003-4110-4431","affiliations":[{"raw_affiliation_string":"Computational biology, Carnegie Mellon University, Pittsburgh, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090992760","display_name":"Travis Dick","orcid":"https://orcid.org/0009-0005-1271-307X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Travis Dick","raw_affiliation_strings":["Google Inc New York, New York, United States"],"raw_orcid":"https://orcid.org/0009-0005-1271-307X","affiliations":[{"raw_affiliation_string":"Google Inc New York, New York, United States","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113653378","display_name":"Carl Kingsford","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carl Kingsford","raw_affiliation_strings":["Computational biology, Carnegie Mellon University, Pittsburgh, United States"],"raw_orcid":"https://orcid.org/0000-0002-0118-5516","affiliations":[{"raw_affiliation_string":"Computational biology, Carnegie Mellon University, Pittsburgh, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023571961","display_name":"T\u00fcomas Sandholm","orcid":"https://orcid.org/0000-0001-8861-9366"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tuomas Sandholm","raw_affiliation_strings":["Computer science, Carnegie Mellon University, Pittsburgh, United States"],"raw_orcid":"https://orcid.org/0000-0001-8861-9366","affiliations":[{"raw_affiliation_string":"Computer science, Carnegie Mellon University, Pittsburgh, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050640377","display_name":"Ellen Vitercik","orcid":"https://orcid.org/0000-0003-4891-1367"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ellen Vitercik","raw_affiliation_strings":["Management Science &amp; Engineering; Computer science, Stanford University, Stanford, United States","Management Science & Engineering"],"raw_orcid":"https://orcid.org/0000-0003-4891-1367","affiliations":[{"raw_affiliation_string":"Management Science &amp; Engineering; Computer science, Stanford University, Stanford, United States","institution_ids":["https://openalex.org/I97018004"]},{"raw_affiliation_string":"Management Science & Engineering","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3055,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.5899611,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"71","issue":"5","first_page":"1","last_page":"58"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.676308274269104},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.48812660574913025}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.676308274269104},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.48812660574913025}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3676278","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3676278","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3676278","source":{"id":"https://openalex.org/S118992489","display_name":"Journal of the ACM","issn_l":"0004-5411","issn":["0004-5411","1557-735X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the ACM","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3676278","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3676278","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3676278","source":{"id":"https://openalex.org/S118992489","display_name":"Journal of the ACM","issn_l":"0004-5411","issn":["0004-5411","1557-735X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of the ACM","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3023850491","display_name":null,"funder_award_id":"R01GM122935","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G5357036168","display_name":null,"funder_award_id":"Graduate Research Fellowship and CCF-2338226 to E.V. and grants IIS-1901403 to M.B. and T.S., IIS-1618714, CCF-1535967, CCF-1910321, and SES-1919453 to M.B., RI-2312342, IIS-1718457, IIS-1617590, and CCF-1733556 to T.S., and DBI-1937540 to C.K.","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G571024940","display_name":null,"funder_award_id":"W911NF2210266, W911NF-17-1-0082 and W911NF2010081 to T.S.","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2966464106.pdf"},"referenced_works_count":114,"referenced_works":["https://openalex.org/W60686164","https://openalex.org/W607505555","https://openalex.org/W1492443707","https://openalex.org/W1542886316","https://openalex.org/W1610269435","https://openalex.org/W1746999111","https://openalex.org/W1799018966","https://openalex.org/W1844580627","https://openalex.org/W1969153299","https://openalex.org/W1971361630","https://openalex.org/W1974520228","https://openalex.org/W1980472426","https://openalex.org/W1984036412","https://openalex.org/W1985123706","https://openalex.org/W1990561950","https://openalex.org/W2000191287","https://openalex.org/W2002638840","https://openalex.org/W2010029425","https://openalex.org/W2014328192","https://openalex.org/W2015007620","https://openalex.org/W2018885841","https://openalex.org/W2024412145","https://openalex.org/W2026187504","https://openalex.org/W2029050771","https://openalex.org/W2029538739","https://openalex.org/W2055043387","https://openalex.org/W2068448872","https://openalex.org/W2068628468","https://openalex.org/W2074231493","https://openalex.org/W2078040677","https://openalex.org/W2086438703","https://openalex.org/W2086886584","https://openalex.org/W2089358540","https://openalex.org/W2090037139","https://openalex.org/W2091364465","https://openalex.org/W2094031081","https://openalex.org/W2108215151","https://openalex.org/W2109949710","https://openalex.org/W2110241016","https://openalex.org/W2111175290","https://openalex.org/W2114320077","https://openalex.org/W2117354486","https://openalex.org/W2120100612","https://openalex.org/W2129076851","https://openalex.org/W2135326109","https://openalex.org/W2135682453","https://openalex.org/W2140890018","https://openalex.org/W2143738202","https://openalex.org/W2147148915","https://openalex.org/W2149758239","https://openalex.org/W2157720863","https://openalex.org/W2159654277","https://openalex.org/W2164143329","https://openalex.org/W2267070684","https://openalex.org/W2273828870","https://openalex.org/W2330274477","https://openalex.org/W2331952463","https://openalex.org/W2441441494","https://openalex.org/W2502882948","https://openalex.org/W2508573783","https://openalex.org/W2536379393","https://openalex.org/W2575112216","https://openalex.org/W2592973894","https://openalex.org/W2602753196","https://openalex.org/W2607437843","https://openalex.org/W2612629517","https://openalex.org/W2619957053","https://openalex.org/W2620217443","https://openalex.org/W2624823912","https://openalex.org/W2625599877","https://openalex.org/W2799013664","https://openalex.org/W2891784792","https://openalex.org/W2904961209","https://openalex.org/W2909813108","https://openalex.org/W2913189977","https://openalex.org/W2914501678","https://openalex.org/W2946765615","https://openalex.org/W2950680102","https://openalex.org/W2958191092","https://openalex.org/W2981871664","https://openalex.org/W2996613330","https://openalex.org/W2997029305","https://openalex.org/W3004718980","https://openalex.org/W3034640570","https://openalex.org/W3034808617","https://openalex.org/W3089386712","https://openalex.org/W3090460193","https://openalex.org/W3100022481","https://openalex.org/W3100248546","https://openalex.org/W3101626944","https://openalex.org/W3107171610","https://openalex.org/W3123311998","https://openalex.org/W3128171061","https://openalex.org/W3128903755","https://openalex.org/W3139046134","https://openalex.org/W3164335878","https://openalex.org/W3189470799","https://openalex.org/W4236362309","https://openalex.org/W4238284510","https://openalex.org/W4240374930","https://openalex.org/W4288581668","https://openalex.org/W4292025355","https://openalex.org/W4293713292","https://openalex.org/W4295700147","https://openalex.org/W4297817121","https://openalex.org/W4298036499","https://openalex.org/W4299591666","https://openalex.org/W4300568064","https://openalex.org/W4301352775","https://openalex.org/W6629542823","https://openalex.org/W6681933737","https://openalex.org/W6739088948","https://openalex.org/W6739115598","https://openalex.org/W6749569130"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109"],"abstract_inverted_index":{"Algorithms":[0],"often":[1],"have":[2],"tunable":[3],"parameters":[4,30,40,150,181],"that":[5,73,122,164,227],"impact":[6],"performance":[7,110,130,138,170,232],"metrics":[8],"such":[9],"as":[10],"runtime":[11,54],"and":[12,102,135,216],"solution":[13],"quality.":[14],"For":[15],"many":[16,166],"algorithms":[17,265],"used":[18],"in":[19,64,85,187,253],"practice,":[20],"no":[21,146],"parameter":[22,78,105],"settings":[23],"admit":[24],"meaningful":[25],"worst-case":[26,50],"bounds,":[27],"so":[28],"the":[29,35,46,112,124,127,132,140,149,176,180,254],"are":[31,151,247],"made":[32],"available":[33],"for":[34,118,165,261],"user":[36],"to":[37,77,82,250],"tune.":[38],"Alternatively,":[39],"may":[41,59],"be":[42,60,153],"tuned":[43],"implicitly":[44],"within":[45],"proof":[47],"of":[48,69,93,168,175,208,241],"a":[49,74,90,104,172,184,224,234],"approximation":[51],"ratio":[52],"or":[53,62,158],"bound.":[55],"Worst-case":[56],"instances,":[57],"however,":[58],"rare":[61],"nonexistent":[63],"practice.":[65],"A":[66],"growing":[67],"body":[68],"research":[70,190],"has":[71,200],"demonstrated":[72],"data-driven":[75],"approach":[76,88],"tuning":[79],"can":[80,182],"lead":[81],"significant":[83],"improvements":[84],"performance.":[86],"This":[87],"uses":[89],"training":[91,113,133],"set":[92,134],"problem":[94],"instances":[95],"sampled":[96],"from":[97,266],"an":[98,156,230],"unknown,":[99],"application-specific":[100],"distribution":[101],"returns":[103],"setting":[106],"with":[107,223],"strong":[108],"average":[109,129],"on":[111,139],"set.":[114],"We":[115,219],"provide":[116],"techniques":[117],"deriving":[119],"generalization":[120,202],"guarantees":[121],"bound":[123],"difference":[125],"between":[126],"algorithm\u2019s":[128,231],"over":[131],"its":[136,242],"expected":[137],"unknown":[141],"distribution.":[142],"Our":[143,244],"results":[144],"apply":[145],"matter":[147],"how":[148],"tuned,":[152],"it":[154],"via":[155],"automated":[157],"manual":[159],"approach.":[160],"The":[161],"challenge":[162],"is":[163,171,233],"types":[167],"algorithms,":[169,210,212,215],"volatile":[173],"function":[174,240],"parameters:":[177],"slightly":[178],"perturbing":[179],"cause":[183],"large":[185],"change":[186],"behavior.":[188],"Prior":[189],"[e.g.,":[191],"12":[192],",":[193,195,197],"16":[194],"20":[196],"62":[198],"]":[199],"proved":[201],"bounds":[203,260],"by":[204],"employing":[205],"case-by-case":[206],"analyses":[207,222],"greedy":[209],"clustering":[211],"integer":[213],"programming":[214,264],"selling":[217],"mechanisms.":[218],"streamline":[220],"these":[221],"general":[225],"theorem":[226],"applies":[228],"whenever":[229],"piecewise-constant,":[235],"piecewise-linear,":[236],"or\u2014more":[237],"generally\u2014":[238],"piecewise-structured":[239],"parameters.":[243],"results,":[245],"which":[246],"tight":[248],"up":[249],"logarithmic":[251],"factors":[252],"worst":[255],"case,":[256],"also":[257],"imply":[258],"novel":[259],"configuring":[262],"dynamic":[263],"computational":[267],"biology.":[268]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
