{"id":"https://openalex.org/W7119126488","doi":"https://doi.org/10.48550/arxiv.2601.00923","title":"Context Collapse: In-Context Learning and Model Collapse","display_name":"Context Collapse: In-Context Learning and Model Collapse","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7119126488","doi":"https://doi.org/10.48550/arxiv.2601.00923"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.00923","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00923","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.00923","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056859452","display_name":"Josef Ott","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ott, Josef","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5056859452"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.1624000072479248,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.1624000072479248,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.1039000004529953,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.09189999848604202,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/linear-regression","display_name":"Linear regression","score":0.44780001044273376},{"id":"https://openalex.org/keywords/martingale","display_name":"Martingale (probability theory)","score":0.42410001158714294},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.39820000529289246},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.3939000070095062},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.37790000438690186},{"id":"https://openalex.org/keywords/linear-model","display_name":"Linear model","score":0.3653999865055084},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.3635999858379364}],"concepts":[{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5163000226020813},{"id":"https://openalex.org/C48921125","wikidata":"https://www.wikidata.org/wiki/Q10861030","display_name":"Linear regression","level":2,"score":0.44780001044273376},{"id":"https://openalex.org/C48406656","wikidata":"https://www.wikidata.org/wiki/Q534112","display_name":"Martingale (probability theory)","level":2,"score":0.42410001158714294},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.39820000529289246},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.3939000070095062},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3813000023365021},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.37790000438690186},{"id":"https://openalex.org/C163175372","wikidata":"https://www.wikidata.org/wiki/Q3339222","display_name":"Linear model","level":2,"score":0.3653999865055084},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.3635999858379364},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.33809998631477356},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3280999958515167},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.3278000056743622},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.30379998683929443},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C121194460","wikidata":"https://www.wikidata.org/wiki/Q856741","display_name":"Random walk","level":2,"score":0.2883000075817108},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.28360000252723694},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2773999869823456},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.27630001306533813}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.00923","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00923","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.00923","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.00923","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0,84,169],"thesis":[1],"investigates":[2],"two":[3],"key":[4],"phenomena":[5],"in":[6,20,44,166,180],"large":[7],"language":[8],"models":[9],"(LLMs):":[10],"in-context":[11,37],"learning":[12],"(ICL)":[13],"and":[14,32,78,104,115,122],"model":[15,99],"collapse.":[16],"We":[17,59,126],"study":[18],"ICL":[19,175],"a":[21,41,49,56,87,92,158],"linear":[22,29,69,113],"transformer":[23,70],"with":[24,176],"tied":[25],"weights":[26],"trained":[27],"on":[28],"regression":[30,114],"tasks,":[31],"show":[33],"that":[34,136],"minimising":[35],"the":[36,45,53,64,68,81,95,140,153,172],"loss":[38],"leads":[39],"to":[40,74,108],"phase":[42],"transition":[43],"learned":[46],"parameters.":[47],"Above":[48],"critical":[50],"context":[51,156,161],"length,":[52],"solution":[54],"develops":[55],"skew-symmetric":[57,88],"component.":[58],"prove":[60],"this":[61],"by":[62,130],"reducing":[63],"forward":[65],"pass":[66],"of":[67,94,155,160,174],"under":[71,119],"weight":[72],"tying":[73],"preconditioned":[75],"gradient":[76,96],"descent,":[77],"then":[79],"analysing":[80],"optimal":[82],"preconditioner.":[83],"preconditioner":[85],"includes":[86],"component,":[89],"which":[90],"induces":[91],"rotation":[93],"direction.":[97],"For":[98],"collapse,":[100],"we":[101,151],"use":[102],"martingale":[103],"random":[105],"walk":[106],"theory":[107],"analyse":[109],"simplified":[110],"settings":[111],"-":[112,118],"Gaussian":[116],"fitting":[117],"both":[120],"replacing":[121],"cumulative":[123],"data":[124,141],"regimes.":[125],"strengthen":[127],"existing":[128],"results":[129],"proving":[131],"almost":[132],"sure":[133],"convergence,":[134],"showing":[135],"collapse":[137],"occurs":[138],"unless":[139],"grows":[142],"sufficiently":[143],"fast":[144],"or":[145],"is":[146],"retained":[147],"over":[148],"time.":[149],"Finally,":[150],"introduce":[152],"notion":[154],"collapse:":[157],"degradation":[159],"during":[162],"long":[163],"generations,":[164],"especially":[165],"chain-of-thought":[167],"reasoning.":[168],"concept":[170],"links":[171],"dynamics":[173],"long-term":[177],"stability":[178],"challenges":[179],"generative":[181],"models.":[182]},"counts_by_year":[],"updated_date":"2026-01-08T20:10:11.968330","created_date":"2026-01-08T00:00:00"}
