{"id":"https://openalex.org/W7160633226","doi":"https://doi.org/10.48550/arxiv.2605.06152","title":"Grokking or Glitching? How Low-Precision Drives Slingshot Loss Spikes","display_name":"Grokking or Glitching? How Low-Precision Drives Slingshot Loss Spikes","publication_year":2026,"publication_date":"2026-05-07","ids":{"openalex":"https://openalex.org/W7160633226","doi":"https://doi.org/10.48550/arxiv.2605.06152"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.06152","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06152","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.06152","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013844321","display_name":"\u5218\u6c57\u9752 Liu Hanqing","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hanqing, Liu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103035568","display_name":"Jianjun Cao","orcid":"https://orcid.org/0000-0002-4281-8324"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Jianjun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018596311","display_name":"Yuanze Li","orcid":"https://orcid.org/0000-0002-5060-4457"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yuanze","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135725843","display_name":"Zijian Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Zijian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.6177999973297119,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.6177999973297119,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.17339999973773956,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.029500000178813934,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5508000254631042},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.47110000252723694},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.444599986076355},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.435699999332428},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.42100000381469727},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.3693999946117401},{"id":"https://openalex.org/keywords/feedback-loop","display_name":"Feedback loop","score":0.35030001401901245},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.33980000019073486}],"concepts":[{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5508000254631042},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.48019999265670776},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.47110000252723694},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.444599986076355},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.435699999332428},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.42100000381469727},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4106999933719635},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C186886427","wikidata":"https://www.wikidata.org/wiki/Q5441213","display_name":"Feedback loop","level":2,"score":0.35030001401901245},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.34769999980926514},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.3402999937534332},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.33980000019073486},{"id":"https://openalex.org/C191795146","wikidata":"https://www.wikidata.org/wiki/Q3878446","display_name":"Norm (philosophy)","level":2,"score":0.33559998869895935},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.3334999978542328},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.32919999957084656},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.32710000872612},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.2946999967098236},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2840000092983246},{"id":"https://openalex.org/C140331021","wikidata":"https://www.wikidata.org/wiki/Q1868104","display_name":"Logit","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.2718999981880188},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C180802074","wikidata":"https://www.wikidata.org/wiki/Q1052379","display_name":"Logistic function","level":2,"score":0.25369998812675476}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.06152","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06152","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.06152","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06152","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.510280966758728}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"exhibit":[3],"periodic":[4],"loss":[5,166,179],"spikes":[6],"during":[7,69],"unregularized":[8],"long-term":[9],"training,":[10,217],"a":[11,40,50,102,119,155,212,220],"phenomenon":[12,38],"known":[13],"as":[14,211],"the":[15,53,56,60,65,71,74,83,86,93,106,110,124,127,132,150,158,164,197],"\"Slingshot":[16],"Mechanism.\"":[17],"Existing":[18],"work":[19],"usually":[20],"attributes":[21],"this":[22,37,116,141],"to":[23,80,136,176],"intrinsic":[24],"optimization":[25],"dynamics,":[26],"but":[27,192],"its":[28],"triggering":[29],"mechanism":[30,142,148],"remains":[31],"unclear.":[32],"This":[33,91,147],"paper":[34],"proves":[35],"that":[36,115,171],"is":[39,77,173],"result":[41],"of":[42,73,85,96,109,161,204,215],"floating-point":[43],"arithmetic":[44],"precision":[45],"limits.":[46],"As":[47],"training":[48],"enters":[49],"high-confidence":[51],"stage,":[52],"difference":[54],"between":[55],"correct-class":[57],"logit":[58,228],"and":[59,100,131,163,200,218,227],"other":[61],"logits":[62],"may":[63,187],"exceed":[64],"absorption-error":[66],"threshold.":[67],"Then":[68],"backpropagation,":[70],"gradient":[72],"correct":[75],"class":[76],"rounded":[78],"exactly":[79],"zero,":[81],"while":[82],"gradients":[84,97],"incorrect":[87],"classes":[88,99],"remain":[89],"nonzero.":[90],"breaks":[92],"zero-sum":[94,198],"constraint":[95,199],"across":[98],"introduces":[101],"systematic":[103],"drift":[104,117],"in":[105,181,230],"parameter":[107,205,225],"update":[108],"classifier":[111,129],"layer.":[112],"We":[113,139,168],"prove":[114],"forms":[118],"positive":[120],"feedback":[121],"loop":[122],"with":[123],"feature,":[125],"causing":[126],"global":[128,133],"mean":[130,135],"feature":[134],"grow":[137],"exponentially.":[138],"call":[140],"Numerical":[143],"Feature":[144],"Inflation":[145],"(NFI).":[146],"explains":[149],"rapid":[151,202],"norm":[152],"growth":[153,203,226],"before":[154],"Slingshot":[156,210],"spike,":[157],"subsequent":[159],"reappearance":[160],"gradients,":[162],"resulting":[165],"spike.":[167],"further":[169],"show":[170],"NFI":[172],"not":[174,188],"equivalent":[175],"an":[177],"observed":[178],"spike:":[180],"more":[182],"practical":[183],"tasks,":[184],"partial":[185],"absorption":[186],"produce":[189],"visible":[190],"spikes,":[191],"it":[193],"can":[194],"still":[195],"break":[196],"drive":[201],"norms.":[206],"Our":[207],"results":[208],"reinterpret":[209],"numerical":[213],"dynamic":[214],"finite-precision":[216],"provide":[219],"testable":[221],"explanation":[222],"for":[223],"abnormal":[224],"divergence":[229],"late-stage":[231],"training.":[232]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-09T00:00:00"}
