{"id":"https://openalex.org/W4411652463","doi":"https://doi.org/10.1145/3711821","title":"HopScotch: A Holistic Approach to Data Layout-Aware Mapping on NPUs for High-Performance DNN Inference","display_name":"HopScotch: A Holistic Approach to Data Layout-Aware Mapping on NPUs for High-Performance DNN Inference","publication_year":2025,"publication_date":"2025-06-25","ids":{"openalex":"https://openalex.org/W4411652463","doi":"https://doi.org/10.1145/3711821"},"language":"en","primary_location":{"id":"doi:10.1145/3711821","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711821","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1145/3711821","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Suhong Lee","orcid":"https://orcid.org/0009-0001-3639-9223"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Suhong Lee","raw_affiliation_strings":["Electrical and Computer Engineering, Seoul National University","Electrical and Computer Engineering, Seoul National University, Gwanak-gu, Korea (the Republic of)"],"raw_orcid":"https://orcid.org/0009-0001-3639-9223","affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, Seoul National University","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Electrical and Computer Engineering, Seoul National University, Gwanak-gu, Korea (the Republic of)","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018573573","display_name":"Boyeal Kim","orcid":"https://orcid.org/0000-0003-4855-3600"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Boyeal Kim","raw_affiliation_strings":["Seoul National University","Seoul National University, Gwanak-gu, Korea (the Republic of)"],"raw_orcid":"https://orcid.org/0000-0003-4855-3600","affiliations":[{"raw_affiliation_string":"Seoul National University","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Seoul National University, Gwanak-gu, Korea (the Republic of)","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029441279","display_name":"Yongseok Choi","orcid":"https://orcid.org/0009-0006-6512-0291"},"institutions":[{"id":"https://openalex.org/I2800747041","display_name":"Korea Aerospace Research Institute","ror":"https://ror.org/037pqnq23","country_code":"KR","type":"government","lineage":["https://openalex.org/I2800747041","https://openalex.org/I2801339556","https://openalex.org/I4387152098","https://openalex.org/I4405260336"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yongseok Choi","raw_affiliation_strings":["SAPEON Korea","SAPEON Korea, Korea (the Republic of)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SAPEON Korea","institution_ids":[]},{"raw_affiliation_string":"SAPEON Korea, Korea (the Republic of)","institution_ids":["https://openalex.org/I2800747041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024723558","display_name":"Hyuk\u2010Jae Lee","orcid":"https://orcid.org/0000-0001-6811-9647"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyuk-Jae Lee","raw_affiliation_strings":["Electrical and Computer Engineering, Seoul National University","Electrical and Computer Engineering, Seoul National University, Gwanak-gu, Korea (the Republic of)"],"raw_orcid":"https://orcid.org/0000-0001-6811-9647","affiliations":[{"raw_affiliation_string":"Electrical and Computer Engineering, Seoul National University","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Electrical and Computer Engineering, Seoul National University, Gwanak-gu, Korea (the Republic of)","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9349,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.75866086,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"22","issue":"3","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10444","display_name":"Context-Aware Activity Recognition Systems","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8299339413642883},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.741537868976593},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4057731628417969},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.405488520860672},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3248741030693054}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8299339413642883},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.741537868976593},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4057731628417969},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.405488520860672},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3248741030693054}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3711821","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711821","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3711821","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3711821","pdf_url":null,"source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1501077214","https://openalex.org/W2194775991","https://openalex.org/W2618530766","https://openalex.org/W2914209329","https://openalex.org/W2935524202","https://openalex.org/W2936278485","https://openalex.org/W2940862705","https://openalex.org/W2945146780","https://openalex.org/W2963163009","https://openalex.org/W2963918968","https://openalex.org/W2979691890","https://openalex.org/W2980200167","https://openalex.org/W3006732000","https://openalex.org/W3007788310","https://openalex.org/W3097528158","https://openalex.org/W3102175148","https://openalex.org/W3116917247","https://openalex.org/W3127736057","https://openalex.org/W3167976421","https://openalex.org/W3184376546","https://openalex.org/W3190062760","https://openalex.org/W3190092209","https://openalex.org/W4200566604","https://openalex.org/W4205983429","https://openalex.org/W4214870144","https://openalex.org/W4247470470","https://openalex.org/W4280543907","https://openalex.org/W4281784760","https://openalex.org/W4285335127","https://openalex.org/W4291910419","https://openalex.org/W4295312788","https://openalex.org/W4308083911","https://openalex.org/W4312443924","https://openalex.org/W4327930469","https://openalex.org/W4380881154","https://openalex.org/W4389104604","https://openalex.org/W4389722504","https://openalex.org/W4392265934","https://openalex.org/W4400188494","https://openalex.org/W4401212157","https://openalex.org/W4409058197","https://openalex.org/W4410553041"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Modern":[0],"deep":[1],"neural":[2,37],"networks":[3],"(DNNs)":[4],"are":[5],"widely":[6],"utilized":[7],"across":[8],"a":[9,50,62,71,108,127,141,173,179,191,198,204],"broad":[10],"range":[11],"of":[12,20,47,98,134,181,212,267],"domains,":[13],"scaling":[14],"rapidly":[15],"and":[16,26,148,241,262,269,277],"often":[17],"comprising":[18],"hundreds":[19,97],"diverse":[21,99],"layers":[22,48,100],"with":[23,70,87,156,247],"varying":[24],"types":[25],"configurations.":[27],"To":[28,119],"accelerate":[29],"DNN":[30,51,115,249],"execution,":[31],"specialized":[32],"hardware":[33],"solutions,":[34],"known":[35],"as":[36],"processing":[38],"units":[39],"(NPUs),":[40],"have":[41],"been":[42],"developed.":[43],"However,":[44],"this":[45,103,121,123],"heterogeneity":[46],"in":[49,79,101,265,271],"model":[52],"may":[53,82],"cause":[54],"performance":[55],"degradation":[56],"on":[57,117,136,203],"NPUs.":[58,118,137],"For":[59],"example,":[60],"while":[61],"layer\u2019s":[63],"execution":[64],"or":[65],"dataflow":[66],"is":[67,227],"generally":[68],"associated":[69],"specific":[72],"data":[73,77,131,165,175],"access":[74],"order,":[75],"the":[76,96,145,149,185,209,213,222,230,237,243],"layout":[78,93,104,166,176,193,215,224,238,255],"on-chip":[80,146,158],"memory":[81,147],"not":[83],"be":[84],"well":[85],"aligned":[86],"it,":[88],"introducing":[89],"bubble":[90],"cycles":[91],"for":[92,111],"reordering.":[94],"Given":[95],"DNNs,":[102],"reordering":[105,167,256],"overhead":[106],"presents":[107,190],"new":[109],"challenge":[110],"achieving":[112],"efficient":[113,210],"end-to-end":[114,272],"inference":[116],"address":[120],"problem,":[122],"article":[124],"introduces":[125,172],"HopScotch,":[126],"holistic":[128],"approach":[129],"to":[130,162,177,235,260,275],"layout-aware":[132],"mapping":[133,194,216,225,232,239],"DNNs":[135],"First,":[138],"HopScotch":[139,189,231,253],"adopts":[140],"routing":[142],"interconnect":[143],"between":[144],"systolic":[150],"array":[151],"utilizing":[152],"three-input":[153],"multiplexers,":[154],"paired":[155],"an":[157],"programmable":[159],"vector":[160],"processor":[161],"manage":[163],"arbitrary":[164],"at":[168,218],"runtime.":[169],"Additionally,":[170],"it":[171],"tailored":[174],"accommodate":[178],"variety":[180],"convolutional":[182],"configurations":[183],"within":[184],"proposed":[186,223],"microarchitecture.":[187],"Second,":[188],"novel":[192],"solver":[195,226],"that":[196,252],"employs":[197],"top-k":[199],"selection":[200],"strategy":[201],"based":[202],"beam":[205],"search":[206],"algorithm,":[207],"facilitating":[208],"exploration":[211],"vast":[214],"space":[217,240],"compile":[219],"time.":[220],"Third,":[221],"integrated":[228],"into":[229],"framework":[233],"(HMF)":[234],"explore":[236],"evaluate":[242],"resulting":[244,264],"performance.":[245],"Experiments":[246],"popular":[248],"models":[250],"show":[251],"reduces":[254],"costs":[257],"by":[258],"up":[259],"98.2%":[261],"90.3%,":[263],"speedups":[266],"2.62\u00d7":[268],"1.64\u00d7":[270],"latency,":[273],"compared":[274],"XLA":[276],"GCD":[278],"2":[279],",":[280],"respectively.":[281]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
