{"id":"https://openalex.org/W4393924649","doi":"https://doi.org/10.48550/arxiv.2404.01413","title":"Is Model Collapse Inevitable? Breaking the Curse of Recursion by Accumulating Real and Synthetic Data","display_name":"Is Model Collapse Inevitable? Breaking the Curse of Recursion by Accumulating Real and Synthetic Data","publication_year":2024,"publication_date":"2024-04-01","ids":{"openalex":"https://openalex.org/W4393924649","doi":"https://doi.org/10.48550/arxiv.2404.01413"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2404.01413","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.01413","pdf_url":"https://arxiv.org/pdf/2404.01413","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2404.01413","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062440568","display_name":"Matthias Gerstgrasser","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gerstgrasser, Matthias","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058188131","display_name":"Rylan Schaeffer","orcid":"https://orcid.org/0000-0002-4298-7216"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schaeffer, Rylan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102621964","display_name":"Apratim Dey","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dey, Apratim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028522433","display_name":"Rafael Rafailov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rafailov, Rafael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094579744","display_name":"Henry Sleight","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sleight, Henry","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114143509","display_name":"John D. Hughes","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hughes, John","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002211679","display_name":"Tomasz Korbak","orcid":"https://orcid.org/0000-0002-6258-2013"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Korbak, Tomasz","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015545817","display_name":"Rajashree Agrawal","orcid":"https://orcid.org/0000-0001-7617-9180"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agrawal, Rajashree","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091826088","display_name":"Dhruv Pai","orcid":"https://orcid.org/0000-0002-6665-9903"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pai, Dhruv","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103249356","display_name":"Andrey Gromov","orcid":"https://orcid.org/0000-0002-5962-7225"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gromov, Andrey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111402907","display_name":"Daniel A. Roberts","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roberts, Daniel A.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089413311","display_name":"Diyi Yang","orcid":"https://orcid.org/0000-0003-1220-3983"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Diyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111161797","display_name":"David L. Donoho","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Donoho, David L.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5091266570","display_name":"Sanmi Koyejo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koyejo, Sanmi","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5062440568"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.48489999771118164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.48489999771118164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recursion","display_name":"Recursion (computer science)","score":0.7420880198478699},{"id":"https://openalex.org/keywords/curse","display_name":"Curse","score":0.5758700966835022},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.40953025221824646},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.3290638327598572},{"id":"https://openalex.org/keywords/theoretical-physics","display_name":"Theoretical physics","score":0.32652997970581055},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.23846480250358582},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.23677173256874084},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.13570541143417358}],"concepts":[{"id":"https://openalex.org/C168773036","wikidata":"https://www.wikidata.org/wiki/Q264164","display_name":"Recursion (computer science)","level":2,"score":0.7420880198478699},{"id":"https://openalex.org/C2780273121","wikidata":"https://www.wikidata.org/wiki/Q109411","display_name":"Curse","level":2,"score":0.5758700966835022},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.40953025221824646},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.3290638327598572},{"id":"https://openalex.org/C33332235","wikidata":"https://www.wikidata.org/wiki/Q18362","display_name":"Theoretical physics","level":1,"score":0.32652997970581055},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.23846480250358582},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.23677173256874084},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.13570541143417358},{"id":"https://openalex.org/C27206212","wikidata":"https://www.wikidata.org/wiki/Q34178","display_name":"Theology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2404.01413","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.01413","pdf_url":"https://arxiv.org/pdf/2404.01413","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2404.01413","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2404.01413","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2404.01413","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2404.01413","pdf_url":"https://arxiv.org/pdf/2404.01413","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G101112439","display_name":null,"funder_award_id":"Fellow","funder_id":"https://openalex.org/F4320306151","funder_display_name":"Alfred P. Sloan Foundation"},{"id":"https://openalex.org/G1060596441","display_name":"CAREER:  Geometric Quantum Order: Fractons, Tensor Gauge Theories and Beyond","funder_award_id":"2045181","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1171700966","display_name":null,"funder_award_id":"NSF CAREER","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2004883221","display_name":null,"funder_award_id":"PHY-2019786","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2926720356","display_name":null,"funder_award_id":"32799","funder_id":"https://openalex.org/F4320332299","funder_display_name":"National Institute of Food and Agriculture"},{"id":"https://openalex.org/G3282844518","display_name":null,"funder_award_id":"1934986","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3509299210","display_name":null,"funder_award_id":"2019786","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5142427584","display_name":null,"funder_award_id":"2046795","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5620962805","display_name":null,"funder_award_id":"67021","funder_id":"https://openalex.org/F4320332299","funder_display_name":"National Institute of Food and Agriculture"},{"id":"https://openalex.org/G6393730823","display_name":null,"funder_award_id":"1909577","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6671297155","display_name":null,"funder_award_id":"CAREER","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6755165505","display_name":null,"funder_award_id":"award","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6770826516","display_name":null,"funder_award_id":"2020-67021-32799","funder_id":"https://openalex.org/F4320332299","funder_display_name":"National Institute of Food and Agriculture"},{"id":"https://openalex.org/G6894402473","display_name":null,"funder_award_id":"Fellowship","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7717910730","display_name":null,"funder_award_id":"DMR-2045181","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8609623916","display_name":null,"funder_award_id":"CCF 1934986","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306151","display_name":"Alfred P. Sloan Foundation","ror":"https://ror.org/052csg198"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320332299","display_name":"National Institute of Food and Agriculture","ror":"https://ror.org/05qx3fv49"},{"id":"https://openalex.org/F4320337367","display_name":"Division of Materials Research","ror":"https://ror.org/01pc7k308"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4393924649.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W4386146136","https://openalex.org/W1561349031","https://openalex.org/W4251206704","https://openalex.org/W3206079743","https://openalex.org/W2039478586","https://openalex.org/W2528559906","https://openalex.org/W2005044196","https://openalex.org/W1485007142","https://openalex.org/W2313412377"],"abstract_inverted_index":{"The":[0],"proliferation":[1],"of":[2,107,139,156,173,211,240,263,266],"generative":[3,168],"models,":[4],"combined":[5],"with":[6,50,237],"pretraining":[7,105],"on":[8,22,96,110,170],"web-scale":[9],"data,":[10],"raises":[11],"a":[12,40,154,209,258],"timely":[13],"question:":[14],"what":[15,90],"happens":[16],"when":[17],"these":[18,150],"models":[19,57,109,169,177,213],"are":[20,214,231],"trained":[21],"their":[23],"own":[24],"generated":[25],"outputs?":[26],"Recent":[27],"investigations":[28],"into":[29],"model-data":[30,52],"feedback":[31,53],"loops":[32,36],"proposed":[33],"that":[34,65,80,115,134,228,249],"such":[35],"would":[37],"lead":[38],"to":[39,216,226,247],"phenomenon":[41],"termed":[42],"model":[43,97,130,148,157,195,269],"collapse,":[44,131,196],"under":[45],"which":[46,208],"performance":[47],"progressively":[48],"degrades":[49],"each":[51,122],"iteration":[54],"until":[55],"fitted":[56],"become":[58],"useless.":[59],"However,":[60],"those":[61],"studies":[62],"largely":[63],"assumed":[64],"new":[66],"data":[67,70,81,94,120,125,141,146,192,230,251],"replace":[68],"old":[69],"over":[71,83],"time,":[72],"where":[73],"an":[74,199],"arguably":[75],"more":[76],"realistic":[77],"assumption":[78],"is":[79],"accumulate":[82],"time.":[84],"In":[85],"this":[86,102,224,245],"paper,":[87],"we":[88,197,243],"ask:":[89],"effect":[91],"does":[92,126],"accumulating":[93,135,191],"have":[95],"collapse?":[98],"We":[99,113,162],"empirically":[100],"study":[101],"question":[103],"by":[104,121,204],"sequences":[106],"language":[108],"text":[111],"corpora.":[112],"confirm":[114],"replacing":[116],"the":[117,136,143,217,233,238,254,264],"original":[118,144],"real":[119,145,174],"generation's":[123],"synthetic":[124,140],"indeed":[127],"tend":[128],"towards":[129],"then":[132],"demonstrate":[133],"successive":[137],"generations":[138],"alongside":[142],"avoids":[147],"collapse;":[149],"results":[151,165],"hold":[152],"across":[153],"range":[155],"sizes,":[158],"architectures,":[159],"and":[160,182],"hyperparameters.":[161],"obtain":[163],"similar":[164],"for":[166,178,185],"deep":[167],"other":[171],"types":[172],"data:":[175],"diffusion":[176],"molecule":[179],"conformation":[180],"generation":[181],"variational":[183],"autoencoders":[184],"image":[186],"generation.":[187],"To":[188],"understand":[189],"why":[190],"can":[193],"avoid":[194],"use":[198],"analytically":[200],"tractable":[201],"framework":[202,225],"introduced":[203],"prior":[205],"work":[206,222],"in":[207],"sequence":[210],"linear":[212],"fit":[215],"previous":[218],"models'":[219],"outputs.":[220],"Previous":[221],"used":[223],"show":[227],"if":[229,250],"replaced,":[232],"test":[234,255],"error":[235,256],"increases":[236],"number":[239,265],"model-fitting":[241],"iterations;":[242],"extend":[244],"argument":[246],"prove":[248],"instead":[252],"accumulate,":[253],"has":[257],"finite":[259],"upper":[260],"bound":[261],"independent":[262],"iterations,":[267],"meaning":[268],"collapse":[270],"no":[271],"longer":[272],"occurs.":[273]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-20T07:46:08.049788","created_date":"2024-04-05T00:00:00"}
