{"id":"https://openalex.org/W4405095648","doi":"https://doi.org/10.48550/arxiv.2412.04403","title":"Establishing Task Scaling Laws via Compute-Efficient Model Ladders","display_name":"Establishing Task Scaling Laws via Compute-Efficient Model Ladders","publication_year":2024,"publication_date":"2024-12-05","ids":{"openalex":"https://openalex.org/W4405095648","doi":"https://doi.org/10.48550/arxiv.2412.04403"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2412.04403","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.04403","pdf_url":"https://arxiv.org/pdf/2412.04403","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.04403","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041892782","display_name":"Akshita Bhagia","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Bhagia, Akshita","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001228471","display_name":"Jiacheng Liu","orcid":"https://orcid.org/0000-0002-3807-3817"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jiacheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003423990","display_name":"Alexander Wettig","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wettig, Alexander","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115024195","display_name":"David Heineman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heineman, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071250061","display_name":"Oyvind Tafjord","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tafjord, Oyvind","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073589706","display_name":"Ananya Harsh Jha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jha, Ananya Harsh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060844217","display_name":"Luca Soldaini","orcid":"https://orcid.org/0000-0001-6998-9863"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Soldaini, Luca","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088517824","display_name":"Noah A. Smith","orcid":"https://orcid.org/0000-0002-2310-6380"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Smith, Noah A.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059265033","display_name":"Dirk Groeneveld","orcid":"https://orcid.org/0000-0002-8274-768X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Groeneveld, Dirk","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079904764","display_name":"Pang Wei Koh","orcid":"https://orcid.org/0000-0003-4330-6969"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koh, Pang Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008013895","display_name":"Jesse Dodge","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dodge, Jesse","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5082305994","display_name":"Hannaneh Hajishirzi","orcid":"https://orcid.org/0000-0002-1055-6657"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hajishirzi, Hannaneh","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5041892782"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9427000284194946,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9427000284194946,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scaling-law","display_name":"Scaling law","score":0.7569502592086792},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6796722412109375},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.5885968208312988},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.49384766817092896},{"id":"https://openalex.org/keywords/law","display_name":"Law","score":0.3798808455467224},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.264701247215271},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2226395606994629},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.1520693600177765},{"id":"https://openalex.org/keywords/management","display_name":"Management","score":0.07482513785362244},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.06584149599075317}],"concepts":[{"id":"https://openalex.org/C2988430800","wikidata":"https://www.wikidata.org/wiki/Q428971","display_name":"Scaling law","level":3,"score":0.7569502592086792},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6796722412109375},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5885968208312988},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.49384766817092896},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.3798808455467224},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.264701247215271},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2226395606994629},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.1520693600177765},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.07482513785362244},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.06584149599075317}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2412.04403","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.04403","pdf_url":"https://arxiv.org/pdf/2412.04403","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2412.04403","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.04403","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.04403","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.04403","pdf_url":"https://arxiv.org/pdf/2412.04403","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4405095648.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2117748264","https://openalex.org/W4298190863","https://openalex.org/W2518229301","https://openalex.org/W2597205018","https://openalex.org/W2554072523","https://openalex.org/W1671671741","https://openalex.org/W2952002823","https://openalex.org/W2013329450","https://openalex.org/W3104909087","https://openalex.org/W2512421151"],"abstract_inverted_index":{"We":[0,61,142,160],"develop":[1],"task":[2,12,33,59],"scaling":[3],"laws":[4,25],"and":[5,45,82,96,169,179],"model":[6,32,44,91,99,158],"ladders":[7],"to":[8,48,57,72,93,101,176],"predict":[9,49,58,129],"the":[10,20,74,78,105,112,116,130,155],"individual":[11],"performance":[13],"of":[14,65,77,111,132,139],"pretrained":[15],"language":[16,27],"models":[17,107,135,178],"(LMs)":[18],"in":[19,154],"overtrained":[21],"setting.":[22],"Standard":[23],"power":[24],"for":[26,85,115,166,172],"modeling":[28],"loss":[29],"cannot":[30],"accurately":[31],"performance.":[34,60],"Therefore,":[35],"we":[36,127],"leverage":[37],"a":[38,63,89,97],"two-step":[39],"prediction":[40,80,148],"approach:":[41],"(1)":[42],"use":[43,55],"data":[46,70],"size":[47],"an":[50],"intermediate":[51],"loss,":[52],"then":[53],"(2)":[54],"it":[56],"train":[62],"set":[64],"small-scale":[66],"\"ladder\"":[67],"models,":[68],"collect":[69],"points":[71,138],"fit":[73],"parameterized":[75],"functions":[76],"two":[79,86],"steps,":[81],"make":[83],"predictions":[84],"target":[87,117,134],"models:":[88],"7B":[90],"trained":[92,100],"4T":[94],"tokens":[95],"13B":[98],"5T":[102],"tokens.":[103],"Training":[104],"ladder":[106],"only":[108],"costs":[109],"1%":[110],"compute":[113],"used":[114],"models.":[118],"On":[119],"four":[120],"multiple-choice":[121],"tasks":[122,145],"formatted":[123],"as":[124],"ranked":[125],"classification,":[126],"can":[128],"accuracy":[131],"both":[133],"within":[136],"2":[137],"absolute":[140],"error.":[141],"find":[143],"that":[144],"with":[146],"higher":[147,152],"error":[149],"also":[150,161],"have":[151],"variance":[153],"metrics":[156],"over":[157],"checkpoints.":[159],"contrast":[162],"multiple":[163],"design":[164],"choices":[165],"predicting":[167],"accuracy,":[168],"present":[170],"recommendations":[171],"extending":[173],"our":[174],"method":[175],"new":[177],"tasks.":[180]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
