{"id":"https://openalex.org/W7133206309","doi":"https://doi.org/10.48550/arxiv.2602.24281","title":"Memory Caching: RNNs with Growing Memory","display_name":"Memory Caching: RNNs with Growing Memory","publication_year":2026,"publication_date":"2026-02-27","ids":{"openalex":"https://openalex.org/W7133206309","doi":"https://doi.org/10.48550/arxiv.2602.24281"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.24281","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066381651","display_name":"Ali Behrouz","orcid":"https://orcid.org/0000-0002-4934-669X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Behrouz, Ali","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104136586","display_name":"Zeman Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zeman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100311577","display_name":"Yuan Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Yuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127869223","display_name":"Peilin Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Peilin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041237539","display_name":"Meisam Razaviyayn","orcid":"https://orcid.org/0000-0003-4342-6661"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Razaviyayn, Meisam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5075598023","display_name":"Vahab Mirrokni","orcid":"https://orcid.org/0000-0001-6705-5629"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mirrokni, Vahab","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7166000008583069,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7166000008583069,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.025100000202655792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.019999999552965164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.6000999808311462},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.5928999781608582},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5291000008583069},{"id":"https://openalex.org/keywords/flat-memory-model","display_name":"Flat memory model","score":0.43470001220703125},{"id":"https://openalex.org/keywords/interleaved-memory","display_name":"Interleaved memory","score":0.39070001244544983},{"id":"https://openalex.org/keywords/dynamic-random-access-memory","display_name":"Dynamic random-access memory","score":0.38179999589920044},{"id":"https://openalex.org/keywords/human-memory","display_name":"Human memory","score":0.3646000027656555},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.34130001068115234}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7799999713897705},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.6000999808311462},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.5928999781608582},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5291000008583069},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45159998536109924},{"id":"https://openalex.org/C57863822","wikidata":"https://www.wikidata.org/wiki/Q905488","display_name":"Flat memory model","level":4,"score":0.43470001220703125},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.39070001244544983},{"id":"https://openalex.org/C118702147","wikidata":"https://www.wikidata.org/wiki/Q189396","display_name":"Dynamic random-access memory","level":3,"score":0.38179999589920044},{"id":"https://openalex.org/C2985957978","wikidata":"https://www.wikidata.org/wiki/Q492","display_name":"Human memory","level":3,"score":0.3646000027656555},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.34130001068115234},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.3294000029563904},{"id":"https://openalex.org/C40506919","wikidata":"https://www.wikidata.org/wiki/Q7452469","display_name":"Sequence learning","level":2,"score":0.3190000057220459},{"id":"https://openalex.org/C133488467","wikidata":"https://www.wikidata.org/wiki/Q6673524","display_name":"Long short term memory","level":4,"score":0.31520000100135803},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.31369999051094055},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.31299999356269836},{"id":"https://openalex.org/C76679254","wikidata":"https://www.wikidata.org/wiki/Q5165163","display_name":"Context-dependent memory","level":4,"score":0.30160000920295715},{"id":"https://openalex.org/C74426580","wikidata":"https://www.wikidata.org/wiki/Q719484","display_name":"Memory map","level":3,"score":0.2939000129699707},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.28450000286102295},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27869999408721924},{"id":"https://openalex.org/C112049663","wikidata":"https://www.wikidata.org/wiki/Q18608","display_name":"Explicit memory","level":4,"score":0.27090001106262207},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.2632000148296356},{"id":"https://openalex.org/C82687282","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Auxiliary memory","level":2,"score":0.25760000944137573},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.2556999921798706}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.24281","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.24281","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.24281","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.24281","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Transformers":[0,61,191],"have":[1],"been":[2],"established":[3],"as":[4],"the":[5,25,101,119,128,174,193,203],"de-facto":[6],"backbones":[7],"for":[8,30],"most":[9],"recent":[10,41],"advances":[11],"in":[12,54,62],"sequence":[13,110],"modeling,":[14,165],"mainly":[15],"due":[16],"to":[17,43,67,107],"their":[18,68,92,151],"growing":[19,129],"memory":[20,93,103,121,130,158],"capacity":[21,104],"that":[22,84,116,171,189],"scales":[23],"with":[24,109,205],"context":[26],"length.":[27],"While":[28],"plausible":[29],"retrieval":[31],"tasks,":[32,64],"it":[33],"causes":[34],"quadratic":[35],"complexity":[36],"and":[37,127,145,149,156,166,207],"so":[38],"has":[39],"motivated":[40],"studies":[42],"explore":[44],"viable":[45],"subquadratic":[46],"recurrent":[47,58,86,177,212],"alternatives.":[48],"Despite":[49],"showing":[50],"promising":[51],"preliminary":[52],"results":[53,162,183],"diverse":[55],"domains,":[56],"such":[57],"architectures":[59],"underperform":[60],"recall-intensive":[63],"often":[65],"attributed":[66],"fixed-size":[69],"memory.":[70],"In":[71],"this":[72],"paper,":[73],"we":[74],"introduce":[75],"Memory":[76,98],"Caching":[77,99],"(MC),":[78],"a":[79,113],"simple":[80],"yet":[81],"effective":[82,102],"technique":[83],"enhances":[85,173],"models":[87],"by":[88],"caching":[89],"checkpoints":[90],"of":[91,105,125,134,140,176,184],"states":[94],"(a.k.a.":[95],"hidden":[96],"states).":[97],"allows":[100],"RNNs":[106,126],"grow":[108],"length,":[111],"offering":[112],"flexible":[114],"trade-off":[115],"interpolates":[117],"between":[118],"fixed":[120],"(i.e.,":[122,131],"$O(L)$":[123],"complexity)":[124,133],"$O(L^2)$":[132],"Transformers.":[135],"We":[136],"propose":[137],"four":[138],"variants":[139,198],"MC,":[141],"including":[142],"gated":[143],"aggregation":[144],"sparse":[146],"selective":[147],"mechanisms,":[148],"discuss":[150],"implications":[152],"on":[153,163],"both":[154],"linear":[155],"deep":[157],"modules.":[159],"Our":[160],"experimental":[161],"language":[164],"long-context":[167],"understanding":[168],"tasks":[169,187],"show":[170,199],"MC":[172,197],"performance":[175],"models,":[178],"supporting":[179],"its":[180],"effectiveness.":[181],"The":[182],"in-context":[185],"recall":[186],"indicate":[188],"while":[190],"achieve":[192],"best":[194],"accuracy,":[195],"our":[196],"competitive":[200],"performance,":[201],"close":[202],"gap":[204],"Transformers,":[206],"performs":[208],"better":[209],"than":[210],"state-of-the-art":[211],"models.":[213]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-03T00:00:00"}
