{"id":"https://openalex.org/W4284974526","doi":"https://doi.org/10.48550/arxiv.2207.02852","title":"Machine Learning Model Sizes and the Parameter Gap","display_name":"Machine Learning Model Sizes and the Parameter Gap","publication_year":2022,"publication_date":"2022-07-05","ids":{"openalex":"https://openalex.org/W4284974526","doi":"https://doi.org/10.48550/arxiv.2207.02852"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2207.02852","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.02852","pdf_url":"https://arxiv.org/pdf/2207.02852","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2207.02852","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103099856","display_name":"Pablo Villalobos","orcid":"https://orcid.org/0009-0004-3247-7994"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Villalobos, Pablo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006510039","display_name":"Jaime Sevilla","orcid":"https://orcid.org/0000-0002-4454-1146"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sevilla, Jaime","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009016780","display_name":"Tamay Besiroglu","orcid":"https://orcid.org/0000-0001-5808-3185"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Besiroglu, Tamay","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035147412","display_name":"Lennart Heim","orcid":"https://orcid.org/0000-0002-2593-266X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heim, Lennart","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103198508","display_name":"Anson Ho","orcid":"https://orcid.org/0000-0003-2597-7785"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ho, Anson","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5034914617","display_name":"Marius Hobbhahn","orcid":"https://orcid.org/0009-0003-8244-3154"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hobbhahn, Marius","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103099856"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9624999761581421,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9624999761581421,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9466999769210815,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stylized-fact","display_name":"Stylized fact","score":0.7837186455726624},{"id":"https://openalex.org/keywords/magnitude","display_name":"Magnitude (astronomy)","score":0.729283332824707},{"id":"https://openalex.org/keywords/pace","display_name":"Pace","score":0.6342312097549438},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.541007936000824},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.5143910050392151},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.484625905752182},{"id":"https://openalex.org/keywords/scarcity","display_name":"Scarcity","score":0.46179744601249695},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4270123839378357},{"id":"https://openalex.org/keywords/statistical-physics","display_name":"Statistical physics","score":0.33974549174308777},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3336876630783081},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21344685554504395},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.14220324158668518},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.11003619432449341},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08315518498420715}],"concepts":[{"id":"https://openalex.org/C38935604","wikidata":"https://www.wikidata.org/wiki/Q4330363","display_name":"Stylized fact","level":2,"score":0.7837186455726624},{"id":"https://openalex.org/C126691448","wikidata":"https://www.wikidata.org/wiki/Q2028919","display_name":"Magnitude (astronomy)","level":2,"score":0.729283332824707},{"id":"https://openalex.org/C2777526511","wikidata":"https://www.wikidata.org/wiki/Q691543","display_name":"Pace","level":2,"score":0.6342312097549438},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.541007936000824},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.5143910050392151},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.484625905752182},{"id":"https://openalex.org/C109747225","wikidata":"https://www.wikidata.org/wiki/Q815758","display_name":"Scarcity","level":2,"score":0.46179744601249695},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4270123839378357},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.33974549174308777},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3336876630783081},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21344685554504395},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.14220324158668518},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.11003619432449341},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08315518498420715},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C139719470","wikidata":"https://www.wikidata.org/wiki/Q39680","display_name":"Macroeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2207.02852","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.02852","pdf_url":"https://arxiv.org/pdf/2207.02852","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2207.02852","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2207.02852","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2207.02852","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.02852","pdf_url":"https://arxiv.org/pdf/2207.02852","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2529311304","https://openalex.org/W4248275646","https://openalex.org/W2992609826","https://openalex.org/W2552900035","https://openalex.org/W2162875951","https://openalex.org/W2062875858","https://openalex.org/W4380047323","https://openalex.org/W2138330538","https://openalex.org/W2046581487","https://openalex.org/W2350612079"],"abstract_inverted_index":{"We":[0,74,104,113],"study":[1],"trends":[2],"in":[3,23,47,99,188],"model":[4,21,38,137],"size":[5,22,39,138],"of":[6,31,45,66,68,97,158],"notable":[7],"machine":[8],"learning":[9],"systems":[10],"over":[11],"time":[12],"using":[13],"a":[14,59,95,124],"curated":[15],"dataset.":[16],"From":[17],"1950":[18,71],"to":[19,53,106,127,173],"2018,":[20],"language":[24,84,163],"models":[25,56,85,90,98,150,172],"increased":[26],"steadily":[27],"by":[28,41],"seven":[29],"orders":[30,44,65],"magnitude.":[32],"The":[33,130],"trend":[34],"then":[35],"accelerated,":[36],"with":[37,170],"increasing":[40,136],"another":[42],"five":[43],"magnitude":[46,67,159],"just":[48],"4":[49],"years":[50],"from":[51],"2018":[52],"2022.":[54,73],"Vision":[55],"grew":[57],"at":[58,205],"more":[60,202],"constant":[61],"pace,":[62],"totaling":[63],"7":[64],"growth":[69],"between":[70],"and":[72,122,165,181],"also":[75],"identify":[76],"that,":[77],"since":[78],"2020,":[79],"there":[80,198],"have":[81,194],"been":[82],"many":[83,89],"below":[86],"20B":[87,140],"parameters,":[88,93],"above":[91],"70B":[92],"but":[94],"scarcity":[96,108],"the":[100,110,119,190],"20-70B":[101],"parameter":[102,111,120],"range.":[103],"refer":[105],"that":[107,197],"as":[109],"gap.":[112],"provide":[114],"some":[115,186],"stylized":[116],"facts":[117],"about":[118],"gap":[121],"propose":[123],"few":[125],"hypotheses":[126],"explain":[128],"it.":[129,175],"explanations":[131],"we":[132,182,192],"favor":[133],"are:":[134],"(a)":[135],"beyond":[139],"parameters":[141],"requires":[142],"adopting":[143],"different":[144],"parallelism":[145],"techniques,":[146],"which":[147],"makes":[148],"mid-sized":[149],"less":[151],"cost-effective,":[152],"(b)":[153],"GPT-3":[154],"was":[155],"one":[156],"order":[157],"larger":[160],"than":[161],"previous":[162],"models,":[164],"researchers":[166],"afterwards":[167],"primarily":[168],"experimented":[169],"bigger":[171],"outperform":[174],"While":[176],"these":[177],"dynamics":[178,204],"likely":[179],"exist,":[180],"believe":[183],"they":[184],"play":[185],"role":[187],"generating":[189],"gap,":[191],"don't":[193],"high":[195],"confidence":[196],"are":[199],"no":[200],"other,":[201],"important":[203],"play.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":8}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
