{"id":"https://openalex.org/W4409248684","doi":"https://doi.org/10.1109/hpca61900.2025.00047","title":"AsyncDIMM: Achieving Asynchronous Execution in DIMM-Based Near-Memory Processing","display_name":"AsyncDIMM: Achieving Asynchronous Execution in DIMM-Based Near-Memory Processing","publication_year":2025,"publication_date":"2025-03-01","ids":{"openalex":"https://openalex.org/W4409248684","doi":"https://doi.org/10.1109/hpca61900.2025.00047"},"language":"en","primary_location":{"id":"doi:10.1109/hpca61900.2025.00047","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00047","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073749486","display_name":"Liyan Chen","orcid":"https://orcid.org/0000-0001-9693-5377"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liyan Chen","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Micro/Nano Electronics"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Micro/Nano Electronics","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066692868","display_name":"Dongxu Lyu","orcid":"https://orcid.org/0000-0001-6826-2670"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongxu Lyu","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Micro/Nano Electronics"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Micro/Nano Electronics","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008057183","display_name":"Jianfei Jiang","orcid":"https://orcid.org/0000-0002-5521-6197"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianfei Jiang","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Micro/Nano Electronics"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Micro/Nano Electronics","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063554298","display_name":"Qin Wang","orcid":"https://orcid.org/0000-0002-6559-5207"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qin Wang","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Micro/Nano Electronics"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Micro/Nano Electronics","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103236320","display_name":"Zhigang Mao","orcid":"https://orcid.org/0000-0001-9431-9853"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhigang Mao","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Micro/Nano Electronics"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Micro/Nano Electronics","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045693138","display_name":"Naifeng Jing","orcid":"https://orcid.org/0000-0001-8417-5796"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Naifeng Jing","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Micro/Nano Electronics"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Micro/Nano Electronics","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5073749486"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":5.1163,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.93721449,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"518","last_page":"532"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10472","display_name":"Semiconductor materials and devices","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8155031204223633},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.6608721017837524},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.47244197130203247},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.41508084535598755},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3671814203262329},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3412685990333557},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2948691248893738},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.23455986380577087},{"id":"https://openalex.org/keywords/semiconductor-memory","display_name":"Semiconductor memory","score":0.11749982833862305}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8155031204223633},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.6608721017837524},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.47244197130203247},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.41508084535598755},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3671814203262329},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3412685990333557},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2948691248893738},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.23455986380577087},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.11749982833862305}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca61900.2025.00047","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00047","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":76,"referenced_works":["https://openalex.org/W1981220134","https://openalex.org/W1981943579","https://openalex.org/W1993488239","https://openalex.org/W2019959197","https://openalex.org/W2036853599","https://openalex.org/W2045431893","https://openalex.org/W2047684617","https://openalex.org/W2123119012","https://openalex.org/W2129513794","https://openalex.org/W2129991978","https://openalex.org/W2145451976","https://openalex.org/W2147657366","https://openalex.org/W2149381887","https://openalex.org/W2159908132","https://openalex.org/W2162838417","https://openalex.org/W2166263440","https://openalex.org/W2474451066","https://openalex.org/W2532349170","https://openalex.org/W2535323484","https://openalex.org/W2622164283","https://openalex.org/W2762749859","https://openalex.org/W2794945502","https://openalex.org/W2795118915","https://openalex.org/W2904929935","https://openalex.org/W2973214126","https://openalex.org/W2979719709","https://openalex.org/W2979823675","https://openalex.org/W2979858238","https://openalex.org/W2982008795","https://openalex.org/W2983532630","https://openalex.org/W3006586535","https://openalex.org/W3033124768","https://openalex.org/W3043023836","https://openalex.org/W3043140114","https://openalex.org/W3100710793","https://openalex.org/W3157531038","https://openalex.org/W3158634533","https://openalex.org/W3191222816","https://openalex.org/W3192841685","https://openalex.org/W3205200804","https://openalex.org/W3205265826","https://openalex.org/W3206328251","https://openalex.org/W3207399097","https://openalex.org/W3210580311","https://openalex.org/W4200394007","https://openalex.org/W4230161130","https://openalex.org/W4232168013","https://openalex.org/W4234876175","https://openalex.org/W4239659666","https://openalex.org/W4249322926","https://openalex.org/W4281644153","https://openalex.org/W4281685651","https://openalex.org/W4285121610","https://openalex.org/W4285504044","https://openalex.org/W4297097348","https://openalex.org/W4308083747","https://openalex.org/W4318328413","https://openalex.org/W4360831749","https://openalex.org/W4380875717","https://openalex.org/W4380881178","https://openalex.org/W4384824875","https://openalex.org/W4386763946","https://openalex.org/W4386765002","https://openalex.org/W4392427708","https://openalex.org/W4393141021","https://openalex.org/W4394998968","https://openalex.org/W4401211456","https://openalex.org/W4401211602","https://openalex.org/W6605099352","https://openalex.org/W6674147819","https://openalex.org/W6686058071","https://openalex.org/W6719768283","https://openalex.org/W6759932407","https://openalex.org/W6779824479","https://openalex.org/W6789513154","https://openalex.org/W6858980326"],"related_works":["https://openalex.org/W2116677773","https://openalex.org/W2155261584","https://openalex.org/W2584231425","https://openalex.org/W2150611273","https://openalex.org/W4207086172","https://openalex.org/W2042919702","https://openalex.org/W4225981436","https://openalex.org/W2156185805","https://openalex.org/W2770353918","https://openalex.org/W2122502560"],"abstract_inverted_index":{"DIMM-based":[0],"near-memory":[1,12],"processing":[2],"(NMP)":[3],"architectures":[4],"address":[5],"the":[6,46,50,90,93,136,206],"\u201cmemory":[7],"wall\u201d":[8],"problem":[9],"by":[10,217],"incorporating":[11],"accelerators":[13],"(NMAs)":[14],"into":[15],"main":[16],"memory":[17,21,54,60,66,83,114,146,151],"devices":[18],"for":[19,149,168,174,183],"high":[20],"bandwidth":[22,157,230],"and":[23,36,49,69,127,141,159,176,198,233],"low":[24],"energy":[25],"consumption.":[26],"However,":[27,85],"critical":[28],"challenges":[29],"prevent":[30,81],"efficient":[31,108,222],"asynchronous":[32,109,223],"execution":[33,79,110,224],"between":[34,131],"host":[35,47,91,197],"NMAs":[37],"in":[38],"DIMM-NMP":[39,75,105],"architectures.":[40],"Memory":[41],"controllers":[42],"(MCs)":[43],"distributed":[44],"at":[45],"side":[48,52],"NMA":[51,94,199],"issue":[53],"accesses":[55],"independently":[56],"without":[57],"synchronization":[58,143],"on":[59,112,205],"states,":[61],"which":[62,201],"may":[63],"lead":[64],"to":[65,80,144,154,195,227],"bus":[67],"contention":[68],"DRAM":[70],"errors.":[71],"Therefore,":[72],"most":[73],"existing":[74,113],"designs":[76],"adopt":[77],"synchronous":[78],"concurrent":[82,124],"accesses.":[84],"this":[86,98],"intervention":[87],"wastes":[88],"either":[89],"or":[92,122],"computation":[95],"capability.":[96],"In":[97],"work,":[99],"we":[100,134,163,186],"propose":[101,135],"AsyncDIMM,":[102],"a":[103,128,170,188],"novel":[104],"design":[106,190],"with":[107,139,179,191,225],"based":[111],"buses.":[115],"It":[116],"enables":[117],"single":[118],"access":[119,125,147,161,235],"mode":[120],"(host":[121],"NMA),":[123],"mode,":[126],"seamless":[129],"switch":[130],"them.":[132],"First,":[133],"offload-schedule-return":[137],"mechanism":[138],"explicit":[140],"implicit":[142],"ensure":[145],"correctness":[148],"all":[150],"modes.":[152],"Second,":[153],"further":[155],"improve":[156],"utilization":[158,231],"decrease":[160],"latency,":[162],"introduce":[164],"optimized":[165],"timing":[166],"constraints":[167],"offloading,":[169],"locality-aware":[171],"switch-recovery":[172],"method":[173],"scheduling,":[175],"adaptive":[177],"batch":[178],"timing-division":[180],"multiplexing":[181],"notification":[182],"returning.":[184],"Finally,":[185],"present":[187],"detailed":[189],"limited":[192],"hardware":[193],"modifications":[194],"conventional":[196],"MCs,":[200],"is":[202],"extensively":[203],"validated":[204],"FPGA.":[207],"Comprehensive":[208],"experiments":[209],"demonstrate":[210],"that":[211],"AsyncDIMM":[212],"outperforms":[213],"four":[214],"NMP":[215],"baselines":[216],"$1.19":[218],"\\times-1.92":[219],"\\times$,":[220],"enabling":[221],"up":[226],"$2.25":[228],"\\times$":[229],"uplift":[232],"47%":[234],"latency":[236],"reduction.":[237]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
