{"id":"https://openalex.org/W4281481109","doi":"https://doi.org/10.48550/arxiv.2205.10487","title":"Scaling Laws and Interpretability of Learning from Repeated Data","display_name":"Scaling Laws and Interpretability of Learning from Repeated Data","publication_year":2022,"publication_date":"2022-05-21","ids":{"openalex":"https://openalex.org/W4281481109","doi":"https://doi.org/10.48550/arxiv.2205.10487"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2205.10487","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.10487","pdf_url":"https://arxiv.org/pdf/2205.10487","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2205.10487","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030442997","display_name":"Danny Hernandez","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hernandez, Danny","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074565235","display_name":"Tom Brown","orcid":"https://orcid.org/0000-0002-6538-3036"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brown, Tom","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059073226","display_name":"Tom Conerly","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Conerly, Tom","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072660054","display_name":"Nova DasSarma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"DasSarma, Nova","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059021264","display_name":"Dawn Drain","orcid":"https://orcid.org/0000-0002-6606-4141"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Drain, Dawn","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049068767","display_name":"Sheer El-Showk","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"El-Showk, Sheer","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020683620","display_name":"Nelson Elhage","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elhage, Nelson","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070402589","display_name":"Zac Hatfield-Dodds","orcid":"https://orcid.org/0000-0002-8646-8362"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hatfield-Dodds, Zac","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049786610","display_name":"Tom Henighan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Henighan, Tom","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087030656","display_name":"Tristan Hume","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hume, Tristan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041888075","display_name":"Scott G. Johnston","orcid":"https://orcid.org/0000-0002-5826-5613"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Johnston, Scott","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110949689","display_name":"Ben Mann","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mann, Ben","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039751155","display_name":"Chris Olah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olah, Chris","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108225540","display_name":"Catherine Olsson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olsson, Catherine","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066197394","display_name":"Dario Amodei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Amodei, Dario","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032088236","display_name":"Nicholas Joseph","orcid":"https://orcid.org/0000-0002-1972-0783"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joseph, Nicholas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053213601","display_name":"Jared Kaplan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kaplan, Jared","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5054887773","display_name":"Sam McCandlish","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McCandlish, Sam","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":18,"corresponding_author_ids":["https://openalex.org/A5030442997"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9584000110626221,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9398000240325928,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.833187460899353},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6881005167961121},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.48830699920654297},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4654563069343567},{"id":"https://openalex.org/keywords/copying","display_name":"Copying","score":0.44508737325668335},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.4277195334434509},{"id":"https://openalex.org/keywords/repetition","display_name":"Repetition (rhetorical device)","score":0.4267755448818207},{"id":"https://openalex.org/keywords/paragraph","display_name":"Paragraph","score":0.42230409383773804},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41309696435928345},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.410269558429718},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3604435324668884},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16332679986953735},{"id":"https://openalex.org/keywords/law","display_name":"Law","score":0.12091228365898132}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.833187460899353},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6881005167961121},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.48830699920654297},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4654563069343567},{"id":"https://openalex.org/C2779151265","wikidata":"https://www.wikidata.org/wiki/Q1156791","display_name":"Copying","level":2,"score":0.44508737325668335},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.4277195334434509},{"id":"https://openalex.org/C2776141515","wikidata":"https://www.wikidata.org/wiki/Q1274479","display_name":"Repetition (rhetorical device)","level":2,"score":0.4267755448818207},{"id":"https://openalex.org/C2777206241","wikidata":"https://www.wikidata.org/wiki/Q194431","display_name":"Paragraph","level":2,"score":0.42230409383773804},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41309696435928345},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.410269558429718},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3604435324668884},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16332679986953735},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.12091228365898132},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2205.10487","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.10487","pdf_url":"https://arxiv.org/pdf/2205.10487","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2205.10487","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2205.10487","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2205.10487","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.10487","pdf_url":"https://arxiv.org/pdf/2205.10487","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.4300000071525574},{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4099999964237213}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2905433371","https://openalex.org/W4390569940","https://openalex.org/W2888392564","https://openalex.org/W4310278675","https://openalex.org/W2377059580","https://openalex.org/W4388422664","https://openalex.org/W2806259446","https://openalex.org/W4361193272","https://openalex.org/W2963326959","https://openalex.org/W4390187575"],"abstract_inverted_index":{"Recent":[0],"large":[1,194,279,286],"language":[2,280],"models":[3,85,281],"have":[4,51],"been":[5],"trained":[6],"on":[7,13],"vast":[8],"datasets,":[9],"but":[10,93],"also":[11],"often":[12],"repeated":[14,40,59,68,100,112],"data,":[15,25],"either":[16],"intentionally":[17],"for":[18,255,269],"the":[19,35,43,89,160,165,169,181,184,197,205,225,230,256],"purpose":[20],"of":[21,57,84,88,97,126,139,149,159,168,196,207,276],"upweighting":[22],"higher":[23],"quality":[24],"or":[26,46],"unintentionally":[27],"because":[28],"data":[29,41,69,90,113,161,185,236,277],"deduplication":[30],"is":[31,37,91,99,177],"not":[32],"perfect":[33],"and":[34,71,189,200,241],"model":[36,143,153,231],"exposed":[38],"to":[39,66,72,118,130,147,215,222,260,284,288],"at":[42],"sentence,":[44],"paragraph,":[45],"document":[47],"level.":[48],"Some":[49],"works":[50],"reported":[52],"substantial":[53],"negative":[54],"performance":[55,138],"effects":[56,75],"this":[58,62,201],"data.":[60],"In":[61],"paper":[63],"we":[64,80,211],"attempt":[65],"study":[67],"systematically":[70],"understand":[73],"its":[74],"mechanistically.":[76],"To":[77],"do":[78],"this,":[79],"train":[81],"a":[82,94,105,150,178,193,252,267,272],"family":[83],"where":[86,183,204],"most":[87],"unique":[92],"small":[95,274],"fraction":[96,195,275],"it":[98],"many":[101],"times.":[102],"We":[103,174],"find":[104],"strong":[106],"double":[107],"descent":[108],"phenomenon,":[109],"in":[110,134,180,278],"which":[111],"can":[114,144,186],"lead":[115,283],"test":[116],"loss":[117],"increase":[119],"midway":[120],"through":[121],"training.":[122],"A":[123],"predictable":[124],"range":[125,179],"repetition":[127,237],"frequency":[128],"leads":[129],"surprisingly":[131],"severe":[132],"degradation":[133,208],"performance.":[135,289],"For":[136],"instance,":[137],"an":[140],"800M":[141],"parameter":[142],"be":[145,187,203],"degraded":[146],"that":[148,235],"2x":[151],"smaller":[152],"(400M":[154],"params)":[155],"by":[156,229,233],"repeating":[157,271],"0.1%":[158],"100":[162],"times,":[163],"despite":[164],"other":[166],"90%":[167],"training":[170],"tokens":[171],"remaining":[172],"unique.":[173],"suspect":[175],"there":[176],"middle":[182],"memorized":[188],"doing":[190],"so":[191],"consumes":[192],"model's":[198],"capacity,":[199],"may":[202],"peak":[206],"occurs.":[209],"Finally,":[210],"connect":[212],"these":[213,264],"observations":[214],"recent":[216],"mechanistic":[217],"interpretability":[218],"work":[219],"-":[220,232],"attempting":[221],"reverse":[223],"engineer":[224],"detailed":[226],"computations":[227],"performed":[228],"showing":[234],"disproportionately":[238,285],"damages":[239],"copying":[240],"internal":[242],"structures":[243],"associated":[244],"with":[245],"generalization,":[246],"such":[247],"as":[248],"induction":[249],"heads,":[250],"providing":[251],"possible":[253],"mechanism":[254],"shift":[257],"from":[258],"generalization":[259],"memorization.":[261],"Taken":[262],"together,":[263],"results":[265],"provide":[266],"hypothesis":[268],"why":[270],"relatively":[273],"could":[282],"harms":[287]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":14}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
