{"id":"https://openalex.org/W4411486116","doi":"https://doi.org/10.1145/3695053.3731002","title":"Topology-Aware Virtualization over Inter-Core Connected Neural Processing Units","display_name":"Topology-Aware Virtualization over Inter-Core Connected Neural Processing Units","publication_year":2025,"publication_date":"2025-06-20","ids":{"openalex":"https://openalex.org/W4411486116","doi":"https://doi.org/10.1145/3695053.3731002"},"language":"en","primary_location":{"id":"doi:10.1145/3695053.3731002","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731002","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731002","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731002","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018025008","display_name":"Dahu Feng","orcid":"https://orcid.org/0000-0002-1146-8552"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dahu Feng","raw_affiliation_strings":["Tsinghua university, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua university, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051943243","display_name":"Erhu Feng","orcid":"https://orcid.org/0009-0006-5957-3024"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Erhu Feng","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102829139","display_name":"Dong Du","orcid":"https://orcid.org/0000-0002-7945-8430"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dong Du","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108745216","display_name":"Pinjie Xu","orcid":"https://orcid.org/0000-0003-0882-6520"},"institutions":[{"id":"https://openalex.org/I4210128910","display_name":"Group Sense (China)","ror":"https://ror.org/036wd5777","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210128910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pinjie Xu","raw_affiliation_strings":["SenseTime Research, Beijing, China"],"affiliations":[{"raw_affiliation_string":"SenseTime Research, Beijing, China","institution_ids":["https://openalex.org/I4210128910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026023746","display_name":"Yubin Xia","orcid":"https://orcid.org/0000-0001-6558-5298"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yubin Xia","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100406215","display_name":"Haibo Chen","orcid":"https://orcid.org/0000-0002-9720-0361"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haibo Chen","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100675943","display_name":"Rong Zhao","orcid":"https://orcid.org/0000-0002-2320-0326"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rong Zhao","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5018025008"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.7467,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.73834062,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1210","last_page":"1224"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7293477654457092},{"id":"https://openalex.org/keywords/virtualization","display_name":"Virtualization","score":0.6733099818229675},{"id":"https://openalex.org/keywords/topology","display_name":"Topology (electrical circuits)","score":0.5213662385940552},{"id":"https://openalex.org/keywords/network-topology","display_name":"Network topology","score":0.4223648011684418},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.34760093688964844},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3343052268028259},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.3139268755912781},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2833266854286194},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.11392772197723389},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09986099600791931},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.07539346814155579}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7293477654457092},{"id":"https://openalex.org/C513985346","wikidata":"https://www.wikidata.org/wiki/Q270471","display_name":"Virtualization","level":3,"score":0.6733099818229675},{"id":"https://openalex.org/C184720557","wikidata":"https://www.wikidata.org/wiki/Q7825049","display_name":"Topology (electrical circuits)","level":2,"score":0.5213662385940552},{"id":"https://openalex.org/C199845137","wikidata":"https://www.wikidata.org/wiki/Q145490","display_name":"Network topology","level":2,"score":0.4223648011684418},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.34760093688964844},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3343052268028259},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3139268755912781},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2833266854286194},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.11392772197723389},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09986099600791931},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.07539346814155579}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3695053.3731002","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731002","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731002","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3695053.3731002","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731002","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731002","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411486116.pdf","grobid_xml":"https://content.openalex.org/works/W4411486116.grobid-xml"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W1508947956","https://openalex.org/W1597213869","https://openalex.org/W1969483458","https://openalex.org/W1996212904","https://openalex.org/W2005574683","https://openalex.org/W2010936531","https://openalex.org/W2013461472","https://openalex.org/W2067231500","https://openalex.org/W2093832531","https://openalex.org/W2097117768","https://openalex.org/W2122485954","https://openalex.org/W2124365587","https://openalex.org/W2131726714","https://openalex.org/W2135651334","https://openalex.org/W2139875443","https://openalex.org/W2170607286","https://openalex.org/W2170994367","https://openalex.org/W2171793220","https://openalex.org/W2194775991","https://openalex.org/W2209598443","https://openalex.org/W2330744268","https://openalex.org/W2342427079","https://openalex.org/W2521036658","https://openalex.org/W2604514113","https://openalex.org/W2606722458","https://openalex.org/W2734941459","https://openalex.org/W2884267664","https://openalex.org/W2962970995","https://openalex.org/W2980104813","https://openalex.org/W3007788310","https://openalex.org/W3010640493","https://openalex.org/W3011167962","https://openalex.org/W3016113124","https://openalex.org/W3027968530","https://openalex.org/W3213528054","https://openalex.org/W4220662607","https://openalex.org/W4229646679","https://openalex.org/W4236713805","https://openalex.org/W4239965559","https://openalex.org/W4281708879","https://openalex.org/W4297097426","https://openalex.org/W4318776735","https://openalex.org/W4376130831","https://openalex.org/W4380881153","https://openalex.org/W4380881154","https://openalex.org/W4381611549","https://openalex.org/W4389500292","https://openalex.org/W4401211704","https://openalex.org/W4401212197"],"related_works":["https://openalex.org/W2086397253","https://openalex.org/W3169265188","https://openalex.org/W2133122801","https://openalex.org/W2267484144","https://openalex.org/W600422426","https://openalex.org/W1907653724","https://openalex.org/W2007156430","https://openalex.org/W4317418707","https://openalex.org/W3081478936","https://openalex.org/W2096272573"],"abstract_inverted_index":{"With":[0],"the":[1,54,81,87,138,148,200,209,213],"rapid":[2],"development":[3],"of":[4,12,56,166,193],"artificial":[5],"intelligence":[6],"(AI)":[7],"applications,":[8],"an":[9,170],"emerging":[10],"class":[11],"AI":[13],"accelerators,":[14],"termed":[15],"Inter-core":[16],"Connected":[17],"Neural":[18],"Processing":[19],"Units":[20],"(NPU),":[21],"has":[22,67,75],"been":[23],"adopted":[24],"in":[25,206],"both":[26,169],"cloud":[27],"and":[28,107,132,141,174,197,202],"edge":[29],"computing":[30],"environments,":[31],"like":[32],"Graphcore":[33],"IPU,":[34],"Tenstorrent,":[35],"etc.Despite":[36],"their":[37],"innovative":[38],"design,":[39],"these":[40],"NPUs":[41,79],"often":[42],"demand":[43],"substantial":[44],"hardware":[45,57,82,214],"resources,":[46],"leading":[47],"to":[48,53,114,126,195,208],"suboptimal":[49],"resource":[50,157],"utilization":[51,158],"due":[52],"imbalance":[55],"requirements":[58],"across":[59],"various":[60],"tasks.To":[61],"address":[62],"this":[63],"issue,":[64],"prior":[65],"research":[66],"explored":[68],"virtualization":[69,90,211],"techniques":[70],"for":[71,92,130,199],"monolithic":[72],"NPUs,":[73,95,188],"but":[74],"neglected":[76],"inter-core":[77,93],"connected":[78,94],"with":[80,159],"topology.This":[83],"paper":[84],"introduces":[85],"vNPU,":[86],"first":[88],"comprehensive":[89],"design":[91],"integrating":[96],"three":[97],"novel":[98],"techniques:":[99],"(1)":[100],"NPU":[101,112,122,134,184],"route":[102],"virtualization,":[103,124],"which":[104,146],"redirects":[105],"instruction":[106],"data":[108],"flow":[109],"from":[110,151],"virtual":[111,119,154,187],"cores":[113],"physical":[115],"ones,":[116],"creating":[117],"a":[118,164,175],"topology;":[120],"(2)":[121],"memory":[123,139],"designed":[125],"minimize":[127],"translation":[128],"stalls":[129],"SRAM-centric":[131],"NoC-equipped":[133],"cores,":[135],"thereby":[136],"maximizing":[137],"bandwidth;":[140],"(3)":[142],"Besteffort":[143],"topology":[144],"mapping,":[145],"determines":[147],"optimal":[149],"mapping":[150],"all":[152],"candidate":[153],"topologies,":[155],"balancing":[156],"end-to-end":[160],"performance.We":[161],"have":[162],"developed":[163],"prototype":[165],"vNPU":[167,189],"on":[168,186],"FPGA":[171],"platform":[172],"(Chipyard+FireSim)":[173],"simulator":[176],"(DCRA).Evaluation":[177],"results":[178],"demonstrate":[179],"that":[180],"when":[181],"executing":[182],"multiple":[183],"workloads":[185],"achieves":[190],"performance":[191,215],"improvements":[192],"up":[194],"1.92x":[196],"1.28x":[198],"Transformer":[201],"ResNet":[203],"models,":[204],"respectively,":[205],"comparison":[207],"MIG-based":[210],"method.Furthermore,":[212]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
