{"id":"https://openalex.org/W7160890010","doi":"https://doi.org/10.48550/arxiv.2605.08301","title":"Priming: Hybrid State Space Models From Pre-trained Transformers","display_name":"Priming: Hybrid State Space Models From Pre-trained Transformers","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160890010","doi":"https://doi.org/10.48550/arxiv.2605.08301"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.08301","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08301","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.08301","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135977802","display_name":"Aditya Chattopadhyay","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chattopadhyay, Aditya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135934944","display_name":"Elvis Nunez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nunez, Elvis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006661300","display_name":"Prannay Kaul","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kaul, Prannay","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135958702","display_name":"Benjamin Bowman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bowman, Benjamin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135987055","display_name":"Evan Becker","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Becker, Evan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036782268","display_name":"Luca Zancato","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zancato, Luca","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135971058","display_name":"David Thomas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thomas, David","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135927468","display_name":"Wei Xia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135991555","display_name":"Stefano Soatto","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Soatto, Stefano","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.08020000159740448,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.08020000159740448,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.07890000194311142,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.0722000002861023,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6309000253677368},{"id":"https://openalex.org/keywords/priming","display_name":"Priming (agriculture)","score":0.5174999833106995},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5109000205993652},{"id":"https://openalex.org/keywords/hybrid-system","display_name":"Hybrid system","score":0.43140000104904175},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.3792000114917755}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6638000011444092},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6309000253677368},{"id":"https://openalex.org/C81444415","wikidata":"https://www.wikidata.org/wiki/Q7243535","display_name":"Priming (agriculture)","level":3,"score":0.5174999833106995},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5109000205993652},{"id":"https://openalex.org/C50897621","wikidata":"https://www.wikidata.org/wiki/Q2665508","display_name":"Hybrid system","level":2,"score":0.43140000104904175},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3792000114917755},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3361000120639801},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.3122999966144562},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2808000147342682},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.2606000006198883}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.08301","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08301","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.08301","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08301","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Hybrid":[0,59,74,89,191,228,237],"State-Space":[1,7],"models":[2,185,238],"combine":[3],"Attention":[4,15],"with":[5,16,33,186,246],"recurrent":[6],"Model":[8],"(SSM)":[9],"layers,":[10],"balancing":[11],"eidetic":[12],"memory":[13,19],"from":[14,20,50,77,91],"compressed":[17],"fading":[18],"SSMs.":[21],"This":[22],"yields":[23],"smaller":[24],"Key-Value":[25],"caches":[26],"and":[27,99,133,162,164,216,242,250,262],"faster":[28],"decoding":[29],"than":[30,107],"Transformers,":[31],"along":[32],"a":[34,52,62,70,78,82,88,92,209,232],"richer":[35],"architectural":[36],"design":[37,41,76],"space.":[38],"Exploring":[39],"that":[40,54,72,166],"space":[42],"at":[43,149],"scale":[44,150,180],"has":[45,55],"so":[46],"far":[47],"required":[48],"training":[49,249],"scratch,":[51],"barrier":[53],"kept":[56],"most":[57],"large-model":[58],"research":[60,226],"within":[61,206],"narrow":[63],"range":[64],"of":[65,109,145,208,235],"architectures.":[66],"We":[67,154,179],"introduce":[68],"Priming,":[69],"method":[71],"turns":[73],"architecture":[75],"pre-training":[79,113],"problem":[80],"into":[81],"knowledge":[83],"transfer":[84],"one.":[85],"Priming":[86,116,136,181,248],"initializes":[87],"model":[90,128,134,233],"pre-trained":[93],"Transformer":[94,122,210],"and,":[95],"through":[96],"short":[97],"alignment":[98],"post-training":[100],"phases,":[101],"recovers":[102],"downstream":[103,173],"quality":[104],"using":[105],"less":[106],"0.5%":[108],"the":[110,120,141,213,247],"source":[111,121,197],"model's":[112],"token":[114],"budget.":[115],"is":[117],"agnostic":[118],"to":[119,139,182,219],"family":[123],"(e.g.,":[124],"Qwen,":[125],"Llama,":[126],"Mistral),":[127],"class":[129],"(dense":[130],"or":[131],"Mixture-of-Experts),":[132],"scale.":[135],"enables":[137],"us":[138],"run":[140],"first":[142],"controlled":[143],"comparison":[144],"SSM":[146],"layer":[147],"types":[148],"under":[151,267],"identical":[152],"conditions.":[153],"evaluate,":[155],"Gated":[156,159],"KalmaNet":[157],"(GKA),":[158],"DeltaNet":[160],"(GDN),":[161],"Mamba-2,":[163],"show":[165],"their":[167],"expressiveness":[168],"hierarchy,":[169],"GKA&gt;GDN&gt;Mamba-2,":[170],"directly":[171],"predicts":[172],"performance":[174],"on":[175,212,227],"long-context":[176,240,257],"reasoning":[177,184,202,241],"tasks.":[178],"8B/32B":[183],"native":[187],"128K":[188],"contexts.":[189],"Our":[190],"GKA":[192,260],"32B":[193],"improves":[194],"over":[195],"its":[196],"Qwen3-32B":[198],"by":[199],"+3.8":[200],"average":[201],"points,":[203],"while":[204],"staying":[205],"1%":[207],"post-trained":[211],"same":[214],"data":[215],"enabling":[217],"up":[218],"2.3x":[220],"higher":[221],"decode":[222],"throughput.":[223],"To":[224],"foster":[225],"architectures,":[229],"we":[230],"release":[231],"zoo":[234],"primed":[236],"for":[239,256],"instruction":[243],"following,":[244],"together":[245],"inference":[251],"code":[252],"(Sequence":[253],"Parallelism":[254],"algorithms":[255],"training,":[258],"optimized":[259],"kernels,":[261],"vLLM":[263],"serving":[264],"plugin),":[265],"all":[266],"Apache~2.0":[268],"License.":[269]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-13T00:00:00"}
