{"id":"https://openalex.org/W4415331714","doi":"https://doi.org/10.48550/arxiv.2509.22947","title":"Induction Signatures Are Not Enough: A Matched-Compute Study of Load-Bearing Structure in In-Context Learning","display_name":"Induction Signatures Are Not Enough: A Matched-Compute Study of Load-Bearing Structure in In-Context Learning","publication_year":2025,"publication_date":"2025-09-26","ids":{"openalex":"https://openalex.org/W4415331714","doi":"https://doi.org/10.48550/arxiv.2509.22947"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2509.22947","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.22947","pdf_url":"https://arxiv.org/pdf/2509.22947","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2509.22947","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109865286","display_name":"Mohammed Sabry","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sabry, Mohammed","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5010118897","display_name":"Anja Belz","orcid":"https://orcid.org/0000-0002-0552-8096"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Belz, Anya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5109865286"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9749000072479248,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9749000072479248,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9010999798774719,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.6758000254631042},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.5630000233650208},{"id":"https://openalex.org/keywords/rule-induction","display_name":"Rule induction","score":0.40630000829696655},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.40610000491142273},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3440999984741211},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3303000032901764}],"concepts":[{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.6758000254631042},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5764999985694885},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.5630000233650208},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45739999413490295},{"id":"https://openalex.org/C2776780472","wikidata":"https://www.wikidata.org/wiki/Q7378945","display_name":"Rule induction","level":2,"score":0.40630000829696655},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.40610000491142273},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3718000054359436},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3440999984741211},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3345000147819519},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3303000032901764},{"id":"https://openalex.org/C2777027219","wikidata":"https://www.wikidata.org/wiki/Q1284190","display_name":"Constant (computer programming)","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.3050999939441681},{"id":"https://openalex.org/C134121241","wikidata":"https://www.wikidata.org/wiki/Q899301","display_name":"Yield (engineering)","level":2,"score":0.2989000082015991},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.27549999952316284},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2509.22947","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.22947","pdf_url":"https://arxiv.org/pdf/2509.22947","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2509.22947","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.22947","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2509.22947","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.22947","pdf_url":"https://arxiv.org/pdf/2509.22947","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415331714.pdf","grobid_xml":"https://content.openalex.org/works/W4415331714.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Mechanism-targeted":[0],"synthetic":[1,225],"data":[2,41,226],"is":[3,115,204,209],"increasingly":[4],"proposed":[5],"as":[6,58,91,213],"a":[7,39,50,59,63,92,144,151,207],"way":[8],"to":[9,119,193],"steer":[10],"pretraining":[11,52],"toward":[12],"desirable":[13],"capabilities,":[14],"but":[15,99,236],"it":[16,215],"remains":[17],"unclear":[18],"how":[19],"such":[20],"interventions":[21,227],"should":[22,228],"be":[23,229],"evaluated.":[24],"We":[25],"study":[26],"this":[27,100,222],"question":[28],"for":[29],"in-context":[30],"learning":[31],"(ICL)":[32],"under":[33],"matched":[34,166],"compute":[35],"(iso-FLOPs)":[36],"using":[37],"Bi-Induct,":[38],"lightweight":[40],"rewrite":[42],"that":[43,181,205,224],"interleaves":[44],"short":[45],"directional":[46,60],"copy":[47,85],"snippets":[48],"into":[49,104],"natural":[51],"stream:":[53],"forward-copy":[54],"(induction),":[55],"backward-copy":[56,134],"(anti-induction,":[57],"control),":[61],"or":[62],"balanced":[64],"mix.":[65],"Across":[66],"0.13B-1B":[67],"decoder-only":[68],"models,":[69],"we":[70],"evaluate":[71],"(i)":[72],"few-shot":[73,108],"performance":[74],"on":[75,110,123],"standard":[76,111],"LM":[77,112],"benchmarks":[78],"and":[79,87,197],"function-style":[80,124],"ICL":[81,163],"probes,":[82],"(ii)":[83],"head-level":[84],"telemetry,":[86],"(iii)":[88],"held-out":[89],"perplexity":[90],"guardrail.":[93],"Bi-Induct":[94,114,191],"reliably":[95],"increases":[96],"induction-head":[97],"activity,":[98],"does":[101],"not":[102,210,231],"translate":[103],"consistent":[105],"improvements":[106],"in":[107,175],"generalization:":[109],"benchmarks,":[113],"largely":[116],"performance-neutral":[117],"relative":[118,172],"natural-only":[120,128,177,182],"training,":[121],"while":[122,244],"probes":[125],"the":[126,155,170,176,211],"1B":[127],"model":[129,220],"performs":[130],"best.":[131],"Despite":[132],"explicit":[133],"cues,":[135],"anti-induction":[136],"scores":[137],"remain":[138],"near":[139],"zero":[140],"across":[141],"scales,":[142],"revealing":[143],"strong":[145],"forward/backward":[146],"asymmetry.":[147],"Targeted":[148],"ablations":[149],"show":[150],"sharper":[152],"distinction:":[153],"removing":[154],"top":[156],"2%":[157],"induction":[158,188,199],"heads":[159],"per":[160],"layer":[161],"harms":[162],"more":[164,185,195],"than":[165],"random":[167],"ablations,":[168],"with":[169],"largest":[171],"drop":[173],"occurring":[174],"models.":[178],"This":[179],"indicates":[180],"training":[183],"produces":[184],"centralized,":[186],"load-bearing":[187],"circuitry,":[189],"whereas":[190],"tends":[192],"create":[194,240],"distributed":[196],"redundant":[198],"activity.":[200],"Our":[201],"main":[202],"conclusion":[203],"eliciting":[206],"mechanism":[208],"same":[212],"making":[214],"load-bearing.":[216],"For":[217],"data-centric":[218],"foundation":[219],"design,":[221],"suggests":[223],"evaluated":[230],"only":[232],"by":[233,237],"signature":[234],"amplification,":[235],"whether":[238],"they":[239],"causally":[241],"necessary":[242],"computation":[243],"preserving":[245],"natural-data":[246],"modeling":[247],"quality.":[248]},"counts_by_year":[],"updated_date":"2026-06-05T09:01:59.212387","created_date":"2025-10-19T00:00:00"}
