{"id":"https://openalex.org/W4414974080","doi":"https://doi.org/10.1145/3725843.3756043","title":"Stratum: System-Hardware Co-Design with Tiered Monolithic 3D-Stackable DRAM for Efficient MoE Serving","display_name":"Stratum: System-Hardware Co-Design with Tiered Monolithic 3D-Stackable DRAM for Efficient MoE Serving","publication_year":2025,"publication_date":"2025-10-17","ids":{"openalex":"https://openalex.org/W4414974080","doi":"https://doi.org/10.1145/3725843.3756043"},"language":"en","primary_location":{"id":"doi:10.1145/3725843.3756043","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3725843.3756043","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 58th IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2510.05245","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yue Pan","orcid":"https://orcid.org/0009-0001-6358-0394"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yue Pan","raw_affiliation_strings":["University of California, San Diego, La Jolla, USA"],"raw_orcid":"https://orcid.org/0009-0001-6358-0394","affiliations":[{"raw_affiliation_string":"University of California, San Diego, La Jolla, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zihan Xia","orcid":"https://orcid.org/0000-0002-5409-321X"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zihan Xia","raw_affiliation_strings":["University of California, San Diego, La Jolla, USA"],"raw_orcid":"https://orcid.org/0000-0002-5409-321X","affiliations":[{"raw_affiliation_string":"University of California, San Diego, La Jolla, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038425923","display_name":"Po-Kai Hsu","orcid":"https://orcid.org/0000-0002-7518-9472"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]},{"id":"https://openalex.org/I2800444561","display_name":"Atlanta Technical College","ror":"https://ror.org/01s3vfp47","country_code":"US","type":"education","lineage":["https://openalex.org/I2800444561"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Po-Kai Hsu","raw_affiliation_strings":["Georgia Tech, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0002-7518-9472","affiliations":[{"raw_affiliation_string":"Georgia Tech, Atlanta, USA","institution_ids":["https://openalex.org/I2800444561","https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111055876","display_name":"Lanxiang Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lanxiang Hu","raw_affiliation_strings":["University of California, San Diego, La Jolla, USA"],"raw_orcid":"https://orcid.org/0000-0003-0641-3677","affiliations":[{"raw_affiliation_string":"University of California, San Diego, La Jolla, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050020235","display_name":"Hyungyo Kim","orcid":"https://orcid.org/0000-0002-1157-0127"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hyungyo Kim","raw_affiliation_strings":["University of Illinois, Urbana-Champaign, Urbana, USA"],"raw_orcid":"https://orcid.org/0000-0002-1157-0127","affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana-Champaign, Urbana, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073942394","display_name":"Janak Sharda","orcid":"https://orcid.org/0000-0002-1438-2439"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]},{"id":"https://openalex.org/I2800444561","display_name":"Atlanta Technical College","ror":"https://ror.org/01s3vfp47","country_code":"US","type":"education","lineage":["https://openalex.org/I2800444561"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Janak Sharda","raw_affiliation_strings":["Georgia Tech, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0002-1438-2439","affiliations":[{"raw_affiliation_string":"Georgia Tech, Atlanta, USA","institution_ids":["https://openalex.org/I2800444561","https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036778557","display_name":"Minxuan Zhou","orcid":"https://orcid.org/0000-0002-5523-7270"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Minxuan Zhou","raw_affiliation_strings":["Illinois Institute of Technology, Chicago, USA"],"raw_orcid":"https://orcid.org/0000-0002-5523-7270","affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology, Chicago, USA","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nam Sung Kim","orcid":"https://orcid.org/0009-0007-5700-8846"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nam Sung Kim","raw_affiliation_strings":["University of Illinois, Urbana-Champaign, Urbana, USA"],"raw_orcid":"https://orcid.org/0009-0007-5700-8846","affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana-Champaign, Urbana, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054894631","display_name":"Shimeng Yu","orcid":"https://orcid.org/0000-0002-0068-3652"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]},{"id":"https://openalex.org/I2800444561","display_name":"Atlanta Technical College","ror":"https://ror.org/01s3vfp47","country_code":"US","type":"education","lineage":["https://openalex.org/I2800444561"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shimeng Yu","raw_affiliation_strings":["Georgia Tech, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0002-0068-3652","affiliations":[{"raw_affiliation_string":"Georgia Tech, Atlanta, USA","institution_ids":["https://openalex.org/I2800444561","https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112841571","display_name":"Tajana Rosing","orcid":null},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tajana Rosing","raw_affiliation_strings":["University of California, San Diego, La Jolla, USA"],"raw_orcid":"https://orcid.org/0000-0002-6954-997X","affiliations":[{"raw_affiliation_string":"University of California, San Diego, La Jolla, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006712350","display_name":"Mingu Kang","orcid":"https://orcid.org/0000-0001-8104-5136"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mingu Kang","raw_affiliation_strings":["University of California, San Diego, La Jolla, USA"],"raw_orcid":"https://orcid.org/0000-0001-8104-5136","affiliations":[{"raw_affiliation_string":"University of California, San Diego, La Jolla, USA","institution_ids":["https://openalex.org/I36258959"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I36258959"],"apc_list":null,"apc_paid":null,"fwci":0.6531,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.75392959,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9812999963760376,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9812999963760376,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.974399983882904,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9573000073432922,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dram","display_name":"Dram","score":0.9107999801635742},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.39809998869895935},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.375},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.36660000681877136},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.3424000144004822},{"id":"https://openalex.org/keywords/interconnection","display_name":"Interconnection","score":0.34220001101493835},{"id":"https://openalex.org/keywords/cas-latency","display_name":"CAS latency","score":0.3407999873161316},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.3278999924659729},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.3244999945163727}],"concepts":[{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.9107999801635742},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7418000102043152},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.515500009059906},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.47780001163482666},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.39809998869895935},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.375},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.36660000681877136},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.3424000144004822},{"id":"https://openalex.org/C123745756","wikidata":"https://www.wikidata.org/wiki/Q1665949","display_name":"Interconnection","level":2,"score":0.34220001101493835},{"id":"https://openalex.org/C189930140","wikidata":"https://www.wikidata.org/wiki/Q1112878","display_name":"CAS latency","level":4,"score":0.3407999873161316},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.3278999924659729},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.3244999945163727},{"id":"https://openalex.org/C530198007","wikidata":"https://www.wikidata.org/wiki/Q80831","display_name":"Integrated circuit","level":2,"score":0.3206999897956848},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3165999948978424},{"id":"https://openalex.org/C190560348","wikidata":"https://www.wikidata.org/wiki/Q3245116","display_name":"Circuit design","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.3140000104904175},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.31279999017715454},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.29269999265670776},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.28600001335144043},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.2827000021934509},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2827000021934509},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C195053848","wikidata":"https://www.wikidata.org/wiki/Q7894141","display_name":"Universal memory","level":5,"score":0.27959999442100525},{"id":"https://openalex.org/C118702147","wikidata":"https://www.wikidata.org/wiki/Q189396","display_name":"Dynamic random-access memory","level":3,"score":0.272599995136261},{"id":"https://openalex.org/C118021083","wikidata":"https://www.wikidata.org/wiki/Q610398","display_name":"System on a chip","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.266400009393692},{"id":"https://openalex.org/C74524168","wikidata":"https://www.wikidata.org/wiki/Q1074539","display_name":"Integrated circuit design","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2549000084400177},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.2531999945640564},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.25270000100135803},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3725843.3756043","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3725843.3756043","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 58th IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2510.05245","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.05245","pdf_url":"https://arxiv.org/pdf/2510.05245","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2510.05245","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2510.05245","pdf_url":"https://arxiv.org/pdf/2510.05245","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1967136720","https://openalex.org/W2041811089","https://openalex.org/W2346205343","https://openalex.org/W2514307950","https://openalex.org/W2588191434","https://openalex.org/W2752340955","https://openalex.org/W2761132374","https://openalex.org/W2765234579","https://openalex.org/W2963123047","https://openalex.org/W3017024317","https://openalex.org/W3100710793","https://openalex.org/W3130554079","https://openalex.org/W3136947505","https://openalex.org/W3154621712","https://openalex.org/W3174762441","https://openalex.org/W3189166979","https://openalex.org/W3210939626","https://openalex.org/W3213412675","https://openalex.org/W4220702013","https://openalex.org/W4233996382","https://openalex.org/W4253995697","https://openalex.org/W4280496502","https://openalex.org/W4283704898","https://openalex.org/W4297097375","https://openalex.org/W4317792959","https://openalex.org/W4324292875","https://openalex.org/W4380302164","https://openalex.org/W4385192512","https://openalex.org/W4387321091","https://openalex.org/W4391622607","https://openalex.org/W4392007377","https://openalex.org/W4392427708","https://openalex.org/W4392716316","https://openalex.org/W4392745968","https://openalex.org/W4393168980","https://openalex.org/W4394998968","https://openalex.org/W4401211878","https://openalex.org/W4401881675","https://openalex.org/W4405755183","https://openalex.org/W4406094561","https://openalex.org/W4407638835","https://openalex.org/W4407692875","https://openalex.org/W4408183447"],"related_works":[],"abstract_inverted_index":{"As":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"continue":[5],"to":[6,34,51,65,139,194,203,218],"evolve,":[7],"Mixture":[8],"of":[9,27,39,79,153,168],"Experts":[10],"(MoE)":[11],"architecture":[12],"has":[13],"emerged":[14],"as":[15],"a":[16,24,37,86],"prevailing":[17],"design":[18],"for":[19,61],"achieving":[20,44],"state-of-the-art":[21],"performance":[22],"across":[23,182,214],"wide":[25],"range":[26],"tasks.":[28],"MoE":[29,73,81],"models":[30,57],"use":[31],"sparse":[32],"gating":[33],"activate":[35],"only":[36],"handful":[38],"expert":[40,191],"sub-networks":[41],"per":[42],"input,":[43],"billion-parameter":[45],"capacity":[46],"with":[47],"inference":[48],"costs":[49],"akin":[50],"much":[52],"smaller":[53],"models.":[54],"However,":[55],"such":[56],"often":[58],"pose":[59],"challenges":[60,78],"hardware":[62],"deployment":[63],"due":[64],"the":[66,72,77,92,119,140,160,172],"massive":[67],"data":[68,181],"volume":[69],"introduced":[70,163],"by":[71,146,164,174,189],"layers.":[74],"To":[75],"address":[76],"serving":[80],"models,":[82],"we":[83,158],"propose":[84],"Stratum,":[85],"system-hardware":[87],"co-design":[88],"approach":[89],"that":[90],"combines":[91],"novel":[93],"memory":[94,177],"technology":[95],"Monolithic":[96],"3D-Stackable":[97],"DRAM":[98,111,121,131,170],"(Mono3D":[99],"DRAM),":[100],"near-memory":[101,155],"processing":[102],"(NMP),":[103],"and":[104,109,123,179,209],"GPU":[105,124,219],"acceleration.":[106],"The":[107,198],"logic":[108],"Mono3D":[110,120,130,169],"dies":[112],"are":[113,125],"connected":[114],"through":[115],"hybrid":[116],"bonding,":[117],"whereas":[118],"stack":[122],"interconnected":[126],"via":[127],"silicon":[128],"interposer.":[129],"offers":[132],"higher":[133],"internal":[134,176],"bandwidth":[135],"than":[136],"HBM":[137],"thanks":[138],"dense":[141],"vertical":[142,166],"interconnect":[143],"pitch":[144],"enabled":[145],"its":[147],"monolithic":[148],"structure,":[149],"which":[150],"supports":[151],"implementations":[152],"higher-performance":[154],"processing.":[156],"Furthermore,":[157],"tackle":[159],"latency":[161],"differences":[162],"aggressive":[165],"scaling":[167],"along":[171],"z-dimension":[173],"constructing":[175],"tiers":[178],"assigning":[180],"layers":[183],"based":[184],"on":[185],"access":[186],"likelihood,":[187],"guided":[188],"topic-based":[190],"usage":[192],"prediction":[193],"boost":[195],"NMP":[196],"throughput.":[197],"Stratum":[199],"system":[200],"achieves":[201],"up":[202],"8.29x":[204],"improvement":[205],"in":[206],"decoding":[207],"throughput":[208],"7.66x":[210],"better":[211],"energy":[212],"efficiency":[213],"various":[215],"benchmarks":[216],"compared":[217],"baselines.":[220]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
