{"id":"https://openalex.org/W7134891416","doi":"https://doi.org/10.1145/3779212.3790197","title":"Ouroboros: Wafer-Scale SRAM CIM with Token-Grained Pipelining for Large Language Model Inference","display_name":"Ouroboros: Wafer-Scale SRAM CIM with Token-Grained Pipelining for Large Language Model Inference","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W7134891416","doi":"https://doi.org/10.1145/3779212.3790197"},"language":null,"primary_location":{"id":"doi:10.1145/3779212.3790197","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790197","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3779212.3790197","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yiqi Liu","orcid":"https://orcid.org/0009-0005-8717-068X"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yiqi Liu","raw_affiliation_strings":["SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-8717-068X","affiliations":[{"raw_affiliation_string":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101320367","display_name":"Yudong Pan","orcid":"https://orcid.org/0009-0001-0012-4113"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yudong Pan","raw_affiliation_strings":["SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-0012-4113","affiliations":[{"raw_affiliation_string":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100707463","display_name":"Mengdi Wang","orcid":"https://orcid.org/0000-0002-7012-2308"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengdi Wang","raw_affiliation_strings":["SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7012-2308","affiliations":[{"raw_affiliation_string":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022007971","display_name":"Shixin Zhao","orcid":"https://orcid.org/0000-0002-5175-7025"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shixin Zhao","raw_affiliation_strings":["SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-5175-7025","affiliations":[{"raw_affiliation_string":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Haonan Zhu","orcid":"https://orcid.org/0000-0002-5644-3105"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haonan Zhu","raw_affiliation_strings":["SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and Hangzhou Institute for Advanced Study, University of Chinese Academy of Sciences, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-5644-3105","affiliations":[{"raw_affiliation_string":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China and Hangzhou Institute for Advanced Study, University of Chinese Academy of Sciences, Hangzhou, China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128769582","display_name":"Yinhe Han","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinhe Han","raw_affiliation_strings":["SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-0904-6681","affiliations":[{"raw_affiliation_string":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128779849","display_name":"Lei Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Zhang","raw_affiliation_strings":["SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9711-8758","affiliations":[{"raw_affiliation_string":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5128718825","display_name":"Ying Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Wang","raw_affiliation_strings":["SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5172-4736","affiliations":[{"raw_affiliation_string":"SKLP, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40219287,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1349","last_page":"1365"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.19709999859333038,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.19709999859333038,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.13300000131130219,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.09769999980926514,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/static-random-access-memory","display_name":"Static random-access memory","score":0.6173999905586243},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5645999908447266},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5602999925613403},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.36419999599456787},{"id":"https://openalex.org/keywords/memory-architecture","display_name":"Memory architecture","score":0.3379000127315521},{"id":"https://openalex.org/keywords/memory-model","display_name":"Memory model","score":0.3276999890804291},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.3255000114440918},{"id":"https://openalex.org/keywords/random-access-memory","display_name":"Random access memory","score":0.31790000200271606}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8112000226974487},{"id":"https://openalex.org/C68043766","wikidata":"https://www.wikidata.org/wiki/Q267416","display_name":"Static random-access memory","level":2,"score":0.6173999905586243},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5645999908447266},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5602999925613403},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.40070000290870667},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.36419999599456787},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.3379000127315521},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.3276999890804291},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.3255000114440918},{"id":"https://openalex.org/C2994168587","wikidata":"https://www.wikidata.org/wiki/Q5295","display_name":"Random access memory","level":2,"score":0.31790000200271606},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3174999952316284},{"id":"https://openalex.org/C41036726","wikidata":"https://www.wikidata.org/wiki/Q844824","display_name":"Physical address","level":3,"score":0.31209999322891235},{"id":"https://openalex.org/C57863822","wikidata":"https://www.wikidata.org/wiki/Q905488","display_name":"Flat memory model","level":4,"score":0.3059999942779541},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.29589998722076416},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.28380000591278076},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.28209999203681946},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.27639999985694885},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.2597000002861023},{"id":"https://openalex.org/C18131444","wikidata":"https://www.wikidata.org/wiki/Q163585","display_name":"Memory protection","level":5,"score":0.2590999901294708},{"id":"https://openalex.org/C153247305","wikidata":"https://www.wikidata.org/wiki/Q835713","display_name":"Memory address","level":3,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3779212.3790197","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790197","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3779212.3790197","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790197","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.8784953355789185}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W83093243","https://openalex.org/W1982945449","https://openalex.org/W2056999868","https://openalex.org/W2069948346","https://openalex.org/W2118231264","https://openalex.org/W2346205343","https://openalex.org/W2588191434","https://openalex.org/W3012493694","https://openalex.org/W3017024317","https://openalex.org/W3047766185","https://openalex.org/W3134274954","https://openalex.org/W3139521791","https://openalex.org/W3159727696","https://openalex.org/W3189877953","https://openalex.org/W3205192296","https://openalex.org/W3206453033","https://openalex.org/W4285121610","https://openalex.org/W4297097318","https://openalex.org/W4297097426","https://openalex.org/W4308083513","https://openalex.org/W4321636575","https://openalex.org/W4380874786","https://openalex.org/W4386396242","https://openalex.org/W4387321091","https://openalex.org/W4393578753","https://openalex.org/W4394998968","https://openalex.org/W4411486243","https://openalex.org/W4411486393","https://openalex.org/W4411486567","https://openalex.org/W4411644263"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"model":[2],"(LLM)":[3],"inference":[4],"demands":[5],"vast":[6],"memory":[7,11,30,75],"capacity":[8],"and":[9,20,58],"hierarchical":[10],"structures,":[12],"but":[13],"conventional":[14],"architectures":[15],"suffer":[16],"from":[17],"excessive":[18],"energy":[19,61],"latency":[21],"costs":[22],"due":[23,70],"to":[24,71],"frequent":[25],"data":[26,56],"movement":[27],"across":[28],"deep":[29],"tiers.":[31],"To":[32],"address":[33],"this,":[34],"we":[35],"propose":[36],"a":[37],"wafer-scale":[38,64],"SRAM-based":[39],"Computing-in-Memory":[40],"(CIM)":[41],"architecture":[42],"that":[43],"performs":[44],"all":[45],"LLM":[46],"operations":[47],"in":[48],"situ":[49],"within":[50],"the":[51,72],"first-level":[52,74],"SRAM,":[53],"eliminating":[54],"off-chip":[55],"migration":[57],"achieving":[59],"unprecedented":[60],"efficiency.":[62],"However,":[63],"SRAM":[65],"CIM":[66],"presents":[67],"multiple":[68],"challenges":[69],"limited":[73],"capacity,":[76],"which":[77],"requires":[78],"efficient":[79],"compute-memory":[80],"resource":[81],"allocation.":[82]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2026-03-12T00:00:00"}
