{"id":"https://openalex.org/W7126177335","doi":"https://doi.org/10.1145/3767110.3767133","title":"Scalable Analytical Memory Modeling of AI Accelerators","display_name":"Scalable Analytical Memory Modeling of AI Accelerators","publication_year":2025,"publication_date":"2025-10-06","ids":{"openalex":"https://openalex.org/W7126177335","doi":"https://doi.org/10.1145/3767110.3767133"},"language":null,"primary_location":{"id":"doi:10.1145/3767110.3767133","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3767110.3767133","pdf_url":null,"source":{"id":"https://openalex.org/S4306524191","display_name":"Proceedings of the International Symposium on Memory Systems","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Symposium on Memory Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3767110.3767133","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124413318","display_name":"Md Azahar Alam","orcid":null},"institutions":[{"id":"https://openalex.org/I90871651","display_name":"University of Toledo","ror":"https://ror.org/01pbdzh19","country_code":"US","type":"education","lineage":["https://openalex.org/I90871651"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Md Azahar Alam","raw_affiliation_strings":["Computer Science and Engineering, The University of Toledo, Toledo, OH, USA"],"raw_orcid":"https://orcid.org/0009-0004-2072-7060","affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, The University of Toledo, Toledo, OH, USA","institution_ids":["https://openalex.org/I90871651"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nandakishore Santhi","orcid":"https://orcid.org/0000-0002-4755-7821"},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nandakishore Santhi","raw_affiliation_strings":["Los Alamos National Laboratory, Los Alamos, NM, USA"],"raw_orcid":"https://orcid.org/0000-0002-4755-7821","affiliations":[{"raw_affiliation_string":"Los Alamos National Laboratory, Los Alamos, NM, USA","institution_ids":["https://openalex.org/I1343871089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124387078","display_name":"Kishwar Ahmed","orcid":null},"institutions":[{"id":"https://openalex.org/I90871651","display_name":"University of Toledo","ror":"https://ror.org/01pbdzh19","country_code":"US","type":"education","lineage":["https://openalex.org/I90871651"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kishwar Ahmed","raw_affiliation_strings":["Computer Science and Engineering, The University of Toledo, Toledo, OH, USA"],"raw_orcid":"https://orcid.org/0000-0001-6295-3569","affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, The University of Toledo, Toledo, OH, USA","institution_ids":["https://openalex.org/I90871651"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.63305986,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"197","last_page":"206"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.450300008058548,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.450300008058548,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.17919999361038208,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.10559999942779541,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.8899000287055969},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7282999753952026},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.5989000201225281},{"id":"https://openalex.org/keywords/dataflow-architecture","display_name":"Dataflow architecture","score":0.5659000277519226},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5005000233650208},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.48669999837875366},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4366999864578247},{"id":"https://openalex.org/keywords/memory-model","display_name":"Memory model","score":0.42100000381469727},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4059000015258789}],"concepts":[{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.8899000287055969},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8148000240325928},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7282999753952026},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.5989000201225281},{"id":"https://openalex.org/C176727019","wikidata":"https://www.wikidata.org/wiki/Q1172415","display_name":"Dataflow architecture","level":3,"score":0.5659000277519226},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5142999887466431},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5005000233650208},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.48669999837875366},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4366999864578247},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.43220001459121704},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.42100000381469727},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4059000015258789},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3894999921321869},{"id":"https://openalex.org/C68043766","wikidata":"https://www.wikidata.org/wiki/Q267416","display_name":"Static random-access memory","level":2,"score":0.3799999952316284},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.36079999804496765},{"id":"https://openalex.org/C123593499","wikidata":"https://www.wikidata.org/wiki/Q6008583","display_name":"In-Memory Processing","level":5,"score":0.3416000008583069},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3190999925136566},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.31470000743865967},{"id":"https://openalex.org/C82687282","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Auxiliary memory","level":2,"score":0.31049999594688416},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.30219998955726624},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.28529998660087585},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2752000093460083},{"id":"https://openalex.org/C2781357197","wikidata":"https://www.wikidata.org/wiki/Q5757597","display_name":"High memory","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.2515999972820282},{"id":"https://openalex.org/C53619493","wikidata":"https://www.wikidata.org/wiki/Q4787093","display_name":"Architecture framework","level":3,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3767110.3767133","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3767110.3767133","pdf_url":null,"source":{"id":"https://openalex.org/S4306524191","display_name":"Proceedings of the International Symposium on Memory Systems","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Symposium on Memory Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3767110.3767133","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3767110.3767133","pdf_url":null,"source":{"id":"https://openalex.org/S4306524191","display_name":"Proceedings of the International Symposium on Memory Systems","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Symposium on Memory Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.4007578194141388,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W2020035025","https://openalex.org/W2024717934","https://openalex.org/W2034861439","https://openalex.org/W2048266589","https://openalex.org/W2147657366","https://openalex.org/W2152839228","https://openalex.org/W2162639668","https://openalex.org/W2164705534","https://openalex.org/W2883084140","https://openalex.org/W2903738101","https://openalex.org/W2940862705","https://openalex.org/W2953212265","https://openalex.org/W2980200167","https://openalex.org/W2998732502","https://openalex.org/W2998901775","https://openalex.org/W3024621361","https://openalex.org/W3039972296","https://openalex.org/W3097528158","https://openalex.org/W3175768685","https://openalex.org/W3210604792","https://openalex.org/W4210439397","https://openalex.org/W4221140087","https://openalex.org/W4241057782","https://openalex.org/W4280635517","https://openalex.org/W4285335127","https://openalex.org/W4288076154","https://openalex.org/W4304202640","https://openalex.org/W4308480476","https://openalex.org/W4312968147","https://openalex.org/W4380874786","https://openalex.org/W4394999049","https://openalex.org/W4404954859"],"related_works":[],"abstract_inverted_index":{"Machine":[0],"learning":[1],"(ML)":[2],"workloads":[3,89],"(e.g.,":[4,38],"deep":[5],"neural":[6,10],"networks":[7,11],"(DNNs),":[8],"convolutional":[9],"(CNNs))":[12],"have":[13],"grown":[14],"significantly":[15],"in":[16,23,34,54],"size":[17],"and":[18,51,105,109,118,132,154,163,177,189,197],"complexity.":[19],"With":[20],"such":[21,123],"growth":[22],"ML":[24,42,88],"workloads,":[25],"memory":[26,61,75,121,195],"performance":[27],"has":[28],"emerged":[29],"as":[30,124],"a":[31,72,158],"critical":[32],"bottleneck":[33],"specialized":[35],"hardware":[36],"accelerators":[37,46,144],"AI":[39,45,91,167],"accelerators)":[40],"for":[41,87],"workloads.":[43],"The":[44,134],"feature":[47],"tightly-coupled":[48],"on-chip":[49,194],"SRAM":[50,125],"compute":[52],"units":[53],"systolic":[55],"array":[56],"architectures":[57],"that":[58,78,184],"demand":[59],"high":[60],"reuse":[62,80,97,107,171],"efficiency":[63,196],"to":[64,83,192],"maintain":[65],"throughput.":[66],"In":[67],"this":[68],"paper,":[69],"we":[70,116],"present":[71],"comprehensive":[73],"analytical":[74],"modeling":[76],"framework":[77],"leverages":[79],"distance":[81,98],"analysis":[82,99,141],"predict":[84],"SRAM-level":[85],"behavior":[86,172],"on":[90,166],"accelerators.":[92],"Our":[93,179],"model":[94,150],"extends":[95],"traditional":[96],"by":[100,143,174],"incorporating":[101],"dataflow":[102,175,190],"semantics,":[103],"spatial":[104],"temporal":[106],"dynamics,":[108],"architectural":[110,182],"buffer":[111,187],"constraints.":[112],"Using":[113],"our":[114,149],"model,":[115],"measure":[117],"report":[119],"key":[120],"metrics":[122,135],"hit":[126],"rate,":[127,129],"miss":[128],"bandwidth":[130],"utilization,":[131],"latency.":[133],"are":[136],"also":[137],"derived":[138],"from":[139],"trace-level":[140],"generated":[142],"like":[145],"ScaleSim.":[146],"We":[147],"evaluate":[148],"across":[151,157],"input-stationary,":[152],"weight-stationary,":[153],"output-stationary":[155],"mappings":[156],"broad":[159],"range":[160],"of":[161],"DNN":[162],"CNN":[164],"benchmarks":[165],"accelerator,":[168],"showing":[169],"how":[170],"varies":[173],"architecture":[176],"applications.":[178],"results":[180],"offer":[181],"insights":[183],"inform":[185],"efficient":[186],"sizing":[188],"configuration":[191],"maximize":[193],"reduce":[198],"off-chip":[199],"traffic.":[200]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-01T00:00:00"}
