{"id":"https://openalex.org/W4385562468","doi":"https://doi.org/10.1145/3580305.3599518","title":"Test Accuracy vs. Generalization Gap: Model Selection in NLP without Accessing Training or Testing Data","display_name":"Test Accuracy vs. Generalization Gap: Model Selection in NLP without Accessing Training or Testing Data","publication_year":2023,"publication_date":"2023-08-04","ids":{"openalex":"https://openalex.org/W4385562468","doi":"https://doi.org/10.1145/3580305.3599518"},"language":"en","primary_location":{"id":"doi:10.1145/3580305.3599518","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599518","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599518","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599518","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020994183","display_name":"Yaoqing Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I107672454","display_name":"Dartmouth College","ror":"https://ror.org/049s0rh22","country_code":"US","type":"education","lineage":["https://openalex.org/I107672454"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yaoqing Yang","raw_affiliation_strings":["Dartmouth College, Hanover, NH, USA"],"affiliations":[{"raw_affiliation_string":"Dartmouth College, Hanover, NH, USA","institution_ids":["https://openalex.org/I107672454"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063289776","display_name":"Ryan Theisen","orcid":"https://orcid.org/0009-0005-7542-5921"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ryan Theisen","raw_affiliation_strings":["University of California Berkeley, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030249020","display_name":"Liam Hodgkinson","orcid":"https://orcid.org/0000-0002-4595-0347"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"The University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Liam Hodgkinson","raw_affiliation_strings":["University of Melbourne, Melbourne, Australia"],"affiliations":[{"raw_affiliation_string":"University of Melbourne, Melbourne, Australia","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072427753","display_name":"Joseph E. Gonzalez","orcid":"https://orcid.org/0000-0003-2921-956X"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joseph E. Gonzalez","raw_affiliation_strings":["University of California, Berkeley, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030620564","display_name":"Kannan Ramchandran","orcid":"https://orcid.org/0000-0002-4567-328X"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kannan Ramchandran","raw_affiliation_strings":["University of California Berkeley, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000385819","display_name":"Charles H. Martin","orcid":"https://orcid.org/0009-0004-5320-9972"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Charles H. Martin","raw_affiliation_strings":["Calculation Consulting, San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Calculation Consulting, San Francisco, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033006662","display_name":"Michael W. Mahoney","orcid":"https://orcid.org/0000-0001-7920-4652"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael W. Mahoney","raw_affiliation_strings":["University of California Berkeley, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5020994183"],"corresponding_institution_ids":["https://openalex.org/I107672454"],"apc_list":null,"apc_paid":null,"fwci":1.9011,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.88559651,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3011","last_page":"3021"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.7888278961181641},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7616384029388428},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7447128891944885},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.7366880178451538},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5412741303443909},{"id":"https://openalex.org/keywords/model-selection","display_name":"Model selection","score":0.5018386840820312},{"id":"https://openalex.org/keywords/performance-metric","display_name":"Performance metric","score":0.4911153018474579},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.48531240224838257},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.45986342430114746},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.43523284792900085},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.42227429151535034},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15956753492355347}],"concepts":[{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.7888278961181641},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7616384029388428},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7447128891944885},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.7366880178451538},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5412741303443909},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.5018386840820312},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.4911153018474579},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.48531240224838257},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.45986342430114746},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.43523284792900085},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.42227429151535034},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15956753492355347},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3580305.3599518","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599518","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599518","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3580305.3599518","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3580305.3599518","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3580305.3599518","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5699999928474426,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G1171700966","display_name":null,"funder_award_id":"NSF CAREER","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2273398380","display_name":null,"funder_award_id":"NSF CAREER","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G2775513529","display_name":null,"funder_award_id":"1730628","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3011529254","display_name":"CIF: Small: Foundations of Serverless Computing: Optimizing Latency and Utility","funder_award_id":"2007669","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G575632154","display_name":null,"funder_award_id":"CISE Expeditions Award CCF-1730628","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5987536167","display_name":null,"funder_award_id":"CAREER","funder_id":"https://openalex.org/F4320332169","funder_display_name":"Directorate for Computer and Information Science and Engineering"},{"id":"https://openalex.org/G6371041305","display_name":"EAGER: SaTC: CORE: Small: Blockchain Architectures for Resource-Constrained Devices","funder_award_id":"1937357","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6404126523","display_name":null,"funder_award_id":"CCF-173062","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6671297155","display_name":null,"funder_award_id":"CAREER","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7452299184","display_name":null,"funder_award_id":"W911NF","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G7623588762","display_name":null,"funder_award_id":"NSF CISE Expeditions Award CCF-1730628","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8998121839","display_name":null,"funder_award_id":"911NF","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320310598","display_name":"Amazon Web Services","ror":"https://ror.org/04mv4n011"},{"id":"https://openalex.org/F4320316785","display_name":"VMware","ror":null},{"id":"https://openalex.org/F4320316831","display_name":"Futurewei Technologies","ror":null},{"id":"https://openalex.org/F4320318398","display_name":"Ant Group","ror":null},{"id":"https://openalex.org/F4320332169","display_name":"Directorate for Computer and Information Science and Engineering","ror":"https://ror.org/025kzpk63"},{"id":"https://openalex.org/F4320333051","display_name":"Intelligence Advanced Research Projects Activity","ror":"https://ror.org/01v3fsc55"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4385562468.pdf","grobid_xml":"https://content.openalex.org/works/W4385562468.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W1593271688","https://openalex.org/W2029029543","https://openalex.org/W2088856850","https://openalex.org/W2251994258","https://openalex.org/W2709553318","https://openalex.org/W2732724430","https://openalex.org/W2889326796","https://openalex.org/W2895616758","https://openalex.org/W2912497910","https://openalex.org/W2963664410","https://openalex.org/W2963807318","https://openalex.org/W2963846996","https://openalex.org/W2981852735","https://openalex.org/W2982041622","https://openalex.org/W2991044292","https://openalex.org/W3006197067","https://openalex.org/W3176456866","https://openalex.org/W3178985214","https://openalex.org/W3191734561","https://openalex.org/W4212774754","https://openalex.org/W4234465017","https://openalex.org/W4288089799","https://openalex.org/W6740483536"],"related_works":["https://openalex.org/W2140186469","https://openalex.org/W4390421286","https://openalex.org/W4280563792","https://openalex.org/W4389724018","https://openalex.org/W4318719684","https://openalex.org/W3183136280","https://openalex.org/W4318559728","https://openalex.org/W2775233965","https://openalex.org/W4360995913","https://openalex.org/W4312193868"],"abstract_inverted_index":{"Selecting":[0],"suitable":[1],"architecture":[2],"parameters":[3],"and":[4,158,180,187,238],"training":[5],"hyperparameters":[6],"is":[7],"essential":[8],"for":[9,30],"enhancing":[10],"machine":[11],"learning":[12],"(ML)":[13],"model":[14,40,64,123,156],"performance.":[15,52],"Several":[16],"recent":[17],"empirical":[18],"studies":[19],"conduct":[20],"large-scale":[21],"correlational":[22],"analysis":[23],"on":[24,58,72,83,126,229],"neural":[25],"networks":[26],"(NNs)":[27],"to":[28,47,109,111,119,235],"search":[29],"effective":[31],"generalization":[32,99,133,189],"metrics":[33,43,90,103,198],"that":[34,91,104,197],"can":[35],"guide":[36],"this":[37,54],"type":[38],"of":[39,97,140,153,165,172,184],"selection.":[41],"Effective":[42],"are":[44,117,205],"typically":[45],"expected":[46],"correlate":[48],"strongly":[49],"with":[50,66],"test":[51,94],"In":[53],"paper,":[55],"we":[56,116,148,195,224],"expand":[57],"prior":[59,79,226],"analyses":[60,136],"by":[61],"examining":[62],"generalization-metric-based":[63],"selection":[65,124],"the":[67,98,121,151,155,159,201],"following":[68],"objectives:":[69],"(i)":[70],"focusing":[71],"natural":[73],"language":[74],"processing":[75],"(NLP)":[76],"tasks,":[77,210],"as":[78],"work":[80],"primarily":[81],"concentrates":[82],"computer":[84],"vision":[85],"(CV)":[86],"tasks;":[87],"(ii)":[88],"considering":[89],"directly":[92],"predict":[93],"error":[95],"instead":[96],"gap;":[100],"(iii)":[101],"exploring":[102],"do":[105],"not":[106],"need":[107],"access":[108],"data":[110],"compute.":[112],"From":[113],"these":[114,222],"objectives,":[115],"able":[118],"provide":[120],"first":[122],"results":[125],"large":[127],"pretrained":[128,167],"Transformers":[129,141,168],"from":[130,169,200],"Huggingface":[131,173],"using":[132],"metrics.":[134,190,218],"Our":[135],"consider":[137],"(I)":[138],"hundreds":[139],"trained":[142],"in":[143,146,208],"different":[144],"settings,":[145],"which":[147],"systematically":[149],"vary":[150],"amount":[152],"data,":[154],"size":[157],"optimization":[160],"hyperparameters,":[161],"(II)":[162],"a":[163,182],"total":[164,183],"51":[166],"eight":[170],"families":[171],"NLP":[174,209],"models,":[175],"including":[176],"GPT2,":[177],"BERT,":[178],"etc.,":[179],"(III)":[181],"28":[185],"existing":[186],"novel":[188],"Despite":[191],"their":[192],"niche":[193],"status,":[194],"find":[196],"derived":[199],"heavy-tail":[202],"(HT)":[203],"perspective":[204],"particularly":[206],"useful":[207],"exhibiting":[211],"stronger":[212],"correlations":[213],"than":[214],"other,":[215],"more":[216],"popular":[217],"To":[219],"further":[220],"examine":[221],"metrics,":[223],"extend":[225],"formulations":[227],"relying":[228],"power":[230,240],"law":[231,241],"(PL)":[232],"spectral":[233],"distributions":[234],"exponential":[236],"(EXP)":[237],"exponentially-truncated":[239],"(E-TPL)":[242],"families.":[243]},"counts_by_year":[{"year":2025,"cited_by_count":11}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
