{"id":"https://openalex.org/W4415285679","doi":"https://doi.org/10.1145/3725843.3756038","title":"ReGate: Enabling Power Gating in Neural Processing Units","display_name":"ReGate: Enabling Power Gating in Neural Processing Units","publication_year":2025,"publication_date":"2025-10-17","ids":{"openalex":"https://openalex.org/W4415285679","doi":"https://doi.org/10.1145/3725843.3756038"},"language":"en","primary_location":{"id":"doi:10.1145/3725843.3756038","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725843.3756038","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3725843.3756038","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 58th IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3725843.3756038","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101548931","display_name":"Yuqi Xue","orcid":"https://orcid.org/0009-0002-0363-9486"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuqi Xue","raw_affiliation_strings":["University of Illinois Urbana-Champaign, Urbana, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign, Urbana, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005380910","display_name":"Jian Huang","orcid":"https://orcid.org/0000-0002-1125-671X"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jian Huang","raw_affiliation_strings":["University of Illinois Urbana-Champaign, Urbana, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign, Urbana, USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101548931"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":4.3794,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.94702602,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1160","last_page":"1177"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.5008999705314636},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.4900999963283539},{"id":"https://openalex.org/keywords/power-gating","display_name":"Power gating","score":0.4632999897003174},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.44699999690055847},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.44290000200271606},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4316999912261963},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.41999998688697815},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.41100001335144043},{"id":"https://openalex.org/keywords/fifo","display_name":"FIFO (computing and electronics)","score":0.39879998564720154},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.39820000529289246}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6668999791145325},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.5687999725341797},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.5008999705314636},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.4900999963283539},{"id":"https://openalex.org/C2780700455","wikidata":"https://www.wikidata.org/wiki/Q7236515","display_name":"Power gating","level":4,"score":0.4632999897003174},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.4480000138282776},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.44699999690055847},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.44290000200271606},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4316999912261963},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.41999998688697815},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.41100001335144043},{"id":"https://openalex.org/C2777145635","wikidata":"https://www.wikidata.org/wiki/Q515636","display_name":"FIFO (computing and electronics)","level":2,"score":0.39879998564720154},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.39820000529289246},{"id":"https://openalex.org/C2778774385","wikidata":"https://www.wikidata.org/wiki/Q4437810","display_name":"Power management","level":3,"score":0.39719998836517334},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.3813999891281128},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.36980000138282776},{"id":"https://openalex.org/C68043766","wikidata":"https://www.wikidata.org/wiki/Q267416","display_name":"Static random-access memory","level":2,"score":0.334199994802475},{"id":"https://openalex.org/C16320812","wikidata":"https://www.wikidata.org/wiki/Q1812200","display_name":"Idle","level":2,"score":0.33329999446868896},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.3262999951839447},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.32499998807907104},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.31150001287460327},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.3098999857902527},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.3034999966621399},{"id":"https://openalex.org/C45872418","wikidata":"https://www.wikidata.org/wiki/Q5318966","display_name":"Dynamic demand","level":3,"score":0.29670000076293945},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2946000099182129},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.29280000925064087},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2867000102996826},{"id":"https://openalex.org/C77390884","wikidata":"https://www.wikidata.org/wiki/Q217302","display_name":"Application-specific integrated circuit","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C123745756","wikidata":"https://www.wikidata.org/wiki/Q1665949","display_name":"Interconnection","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.2694999873638153},{"id":"https://openalex.org/C117551214","wikidata":"https://www.wikidata.org/wiki/Q6692774","display_name":"Low-power electronics","level":4,"score":0.265500009059906},{"id":"https://openalex.org/C131017901","wikidata":"https://www.wikidata.org/wiki/Q170451","display_name":"Logic gate","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.25679999589920044}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3725843.3756038","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725843.3756038","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3725843.3756038","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 58th IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2508.02536","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.02536","pdf_url":"https://arxiv.org/pdf/2508.02536","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3725843.3756038","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725843.3756038","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3725843.3756038","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 58th IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1975624819","display_name":null,"funder_award_id":"62025404, 62222411","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2083879895","display_name":null,"funder_award_id":"CNS-2144796","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2460662635","display_name":null,"funder_award_id":"2144796","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3379715643","display_name":null,"funder_award_id":"2023YFB4404400","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G6671297155","display_name":null,"funder_award_id":"CAREER","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320317153","display_name":"DeepMind","ror":"https://ror.org/00971b260"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320332222","display_name":"University of Illinois at Urbana-Champaign","ror":"https://ror.org/047426m28"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415285679.pdf","grobid_xml":"https://content.openalex.org/works/W4415285679.grobid-xml"},"referenced_works_count":47,"referenced_works":["https://openalex.org/W1996037679","https://openalex.org/W2002991280","https://openalex.org/W2030338488","https://openalex.org/W2068766249","https://openalex.org/W2126357937","https://openalex.org/W2131054871","https://openalex.org/W2158899676","https://openalex.org/W2163543978","https://openalex.org/W2170382128","https://openalex.org/W2346205343","https://openalex.org/W2492295191","https://openalex.org/W2513715959","https://openalex.org/W2516501416","https://openalex.org/W2625103132","https://openalex.org/W2734941459","https://openalex.org/W2917720315","https://openalex.org/W3015213341","https://openalex.org/W3127736057","https://openalex.org/W3133507165","https://openalex.org/W3157218387","https://openalex.org/W3159185005","https://openalex.org/W3172967059","https://openalex.org/W3190062760","https://openalex.org/W3206857466","https://openalex.org/W3212679402","https://openalex.org/W4200362663","https://openalex.org/W4281971810","https://openalex.org/W4283704460","https://openalex.org/W4312639064","https://openalex.org/W4360605545","https://openalex.org/W4380874786","https://openalex.org/W4380881153","https://openalex.org/W4381611549","https://openalex.org/W4382142107","https://openalex.org/W4392745848","https://openalex.org/W4394998532","https://openalex.org/W4399566540","https://openalex.org/W4401211558","https://openalex.org/W4402427803","https://openalex.org/W4404400593","https://openalex.org/W4404400711","https://openalex.org/W4404954356","https://openalex.org/W4408860742","https://openalex.org/W4408894415","https://openalex.org/W4409248734","https://openalex.org/W4409356500","https://openalex.org/W4411485910"],"related_works":[],"abstract_inverted_index":{"The":[0],"energy":[1,29,206],"efficiency":[2],"of":[3,21,27,41,58,105,136,208,227],"neural":[4],"processing":[5,137],"units":[6,166],"(NPU)":[7],"plays":[8],"a":[9,160,195],"critical":[10],"role":[11],"in":[12,45,62,76,86,102,145,235],"developing":[13],"sustainable":[14],"data":[15],"centers.Our":[16],"study":[17],"with":[18,65,218],"different":[19],"generations":[20],"NPU":[22,47,63,106,181,197,209,236],"chips":[23,64,107,210],"reveals":[24],"that":[25,123,153,201],"30%-72%":[26],"their":[28],"consumption":[30,207],"is":[31],"contributed":[32],"by":[33,211],"static":[34],"power":[35,42,112,117,131,191],"dissipation,":[36],"due":[37,81],"to":[38,82,188,213],"the":[39,83,99,110,134,141,180,190,205],"lack":[40],"management":[43,113],"support":[44],"modern":[46],"chips.In":[48],"this":[49],"paper,":[50],"we":[51,96,199],"present":[52],"ReGate,":[53],"which":[54],"enables":[55,129],"fine-grained":[56],"power-gating":[57,69,75,100,228],"each":[59,103],"hardware":[60,87,225],"component":[61,104],"hardware/software":[66],"co-design.Unlike":[67],"conventional":[68],"techniques":[70],"for":[71,119],"generic":[72],"processors,":[73],"enabling":[74],"NPUs":[77],"faces":[78],"unique":[79],"challenges":[80],"fundamental":[84],"difference":[85],"architecture":[88],"and":[89,108,150,167,183],"program":[90],"execution":[91,126,144],"model.To":[92],"address":[93],"these":[94],"challenges,":[95],"carefully":[97],"investigate":[98],"opportunities":[101],"decide":[109],"best-fit":[111],"scheme":[114],"(i.e.,":[115],"hardware-vs.software-managed":[116],"gating).Specifically,":[118],"systolic":[120],"arrays":[121],"(SAs)":[122],"have":[124,154],"deterministic":[125],"patterns,":[127,177],"ReGate":[128,158,178,202],"cycle-level":[130],"gating":[132],"at":[133],"granularity":[135],"elements":[138],"(PEs)":[139],"following":[140],"inherent":[142],"dataflow":[143],"SAs.For":[146],"inter-chip":[147],"interconnect":[148],"(ICI)":[149],"HBM":[151],"controllers":[152],"long":[155],"idle":[156,170],"intervals,":[157],"employs":[159],"lightweight":[161],"hardware-based":[162],"idle-detection":[163],"mechanism.For":[164],"vector":[165],"SRAM":[168],"whose":[169],"periods":[171],"vary":[172],"significantly":[173],"depending":[174],"on":[175,194,216,221],"workload":[176,223],"extends":[179],"ISA":[182],"allows":[184],"software":[185],"(e.g.,":[186],"compilers)":[187],"manage":[189],"gating.With":[192],"implementation":[193,226],"production-level":[196],"simulator,":[198],"show":[200],"can":[203],"reduce":[204],"up":[212],"32.8%":[214],"(15.5%":[215],"average),":[217],"negligible":[219],"impact":[220],"AI":[222],"performance.The":[224],"logic":[229],"introduces":[230],"less":[231],"than":[232],"3.3%":[233],"overhead":[234],"chips.":[237]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-17T00:00:00"}
