{"id":"https://openalex.org/W3175880846","doi":"https://doi.org/10.1109/aicas51828.2021.9458548","title":"An Energy-Efficient Hardware Accelerator for Hierarchical Deep Reinforcement Learning","display_name":"An Energy-Efficient Hardware Accelerator for Hierarchical Deep Reinforcement Learning","publication_year":2021,"publication_date":"2021-06-06","ids":{"openalex":"https://openalex.org/W3175880846","doi":"https://doi.org/10.1109/aicas51828.2021.9458548","mag":"3175880846"},"language":"en","primary_location":{"id":"doi:10.1109/aicas51828.2021.9458548","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aicas51828.2021.9458548","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE 3rd International Conference on Artificial Intelligence Circuits and Systems (AICAS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067589584","display_name":"Aidin Shiri","orcid":"https://orcid.org/0000-0001-5402-0988"},"institutions":[{"id":"https://openalex.org/I79272384","display_name":"University of Maryland, Baltimore County","ror":"https://ror.org/02qskvh78","country_code":"US","type":"education","lineage":["https://openalex.org/I79272384"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Aidin Shiri","raw_affiliation_strings":["University of Maryland Baltimore County"],"affiliations":[{"raw_affiliation_string":"University of Maryland Baltimore County","institution_ids":["https://openalex.org/I79272384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079352628","display_name":"Bharat Prakash","orcid":null},"institutions":[{"id":"https://openalex.org/I79272384","display_name":"University of Maryland, Baltimore County","ror":"https://ror.org/02qskvh78","country_code":"US","type":"education","lineage":["https://openalex.org/I79272384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bharat Prakash","raw_affiliation_strings":["University of Maryland Baltimore County"],"affiliations":[{"raw_affiliation_string":"University of Maryland Baltimore County","institution_ids":["https://openalex.org/I79272384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007957361","display_name":"Arnab Neelim Mazumder","orcid":"https://orcid.org/0000-0002-9550-7917"},"institutions":[{"id":"https://openalex.org/I79272384","display_name":"University of Maryland, Baltimore County","ror":"https://ror.org/02qskvh78","country_code":"US","type":"education","lineage":["https://openalex.org/I79272384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arnab Neelim Mazumder","raw_affiliation_strings":["University of Maryland Baltimore County"],"affiliations":[{"raw_affiliation_string":"University of Maryland Baltimore County","institution_ids":["https://openalex.org/I79272384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079459509","display_name":"Nicholas R. Waytowich","orcid":"https://orcid.org/0000-0002-3786-0675"},"institutions":[{"id":"https://openalex.org/I166416128","display_name":"DEVCOM Army Research Laboratory","ror":"https://ror.org/011hc8f90","country_code":"US","type":"government","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I166416128","https://openalex.org/I2802705668","https://openalex.org/I4210154437"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nicholas R. Waytowich","raw_affiliation_strings":["US Army Research Laboratory"],"affiliations":[{"raw_affiliation_string":"US Army Research Laboratory","institution_ids":["https://openalex.org/I166416128"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114778025","display_name":"Tim Oates","orcid":"https://orcid.org/0000-0002-8655-747X"},"institutions":[{"id":"https://openalex.org/I79272384","display_name":"University of Maryland, Baltimore County","ror":"https://ror.org/02qskvh78","country_code":"US","type":"education","lineage":["https://openalex.org/I79272384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tim Oates","raw_affiliation_strings":["University of Maryland Baltimore County"],"affiliations":[{"raw_affiliation_string":"University of Maryland Baltimore County","institution_ids":["https://openalex.org/I79272384"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084010501","display_name":"Tinoosh Mohsenin","orcid":"https://orcid.org/0000-0001-5551-2124"},"institutions":[{"id":"https://openalex.org/I79272384","display_name":"University of Maryland, Baltimore County","ror":"https://ror.org/02qskvh78","country_code":"US","type":"education","lineage":["https://openalex.org/I79272384"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tinoosh Mohsenin","raw_affiliation_strings":["University of Maryland Baltimore County"],"affiliations":[{"raw_affiliation_string":"University of Maryland Baltimore County","institution_ids":["https://openalex.org/I79272384"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5067589584"],"corresponding_institution_ids":["https://openalex.org/I79272384"],"apc_list":null,"apc_paid":null,"fwci":0.2719,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.6211915,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8371874094009399},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.801205039024353},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.6122720837593079},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5612030625343323},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5423181653022766},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.5241682529449463},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5153878927230835},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5150730013847351},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.46327874064445496},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.4387354254722595},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.41624200344085693},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39464765787124634},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.36475127935409546},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3559976816177368},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.12149849534034729}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8371874094009399},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.801205039024353},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.6122720837593079},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5612030625343323},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5423181653022766},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.5241682529449463},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5153878927230835},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5150730013847351},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.46327874064445496},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.4387354254722595},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.41624200344085693},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39464765787124634},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.36475127935409546},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3559976816177368},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.12149849534034729},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/aicas51828.2021.9458548","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aicas51828.2021.9458548","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE 3rd International Conference on Artificial Intelligence Circuits and Systems (AICAS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.800000011920929,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320316514","display_name":"Arm","ror":"https://ror.org/04mmhzs81"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1515851193","https://openalex.org/W2604814848","https://openalex.org/W2736601468","https://openalex.org/W2795444169","https://openalex.org/W2923223302","https://openalex.org/W2924168890","https://openalex.org/W2944908380","https://openalex.org/W2949760349","https://openalex.org/W2964227312","https://openalex.org/W3083734093","https://openalex.org/W3101516857","https://openalex.org/W3122795696","https://openalex.org/W3134132041","https://openalex.org/W3197719881","https://openalex.org/W6741002519","https://openalex.org/W6760639414","https://openalex.org/W6761947911","https://openalex.org/W6903759691"],"related_works":["https://openalex.org/W2770234245","https://openalex.org/W96612179","https://openalex.org/W4229499248","https://openalex.org/W2566006169","https://openalex.org/W1567818861","https://openalex.org/W2987774938","https://openalex.org/W4256492088","https://openalex.org/W632915154","https://openalex.org/W2055733372","https://openalex.org/W4283700541"],"abstract_inverted_index":{"Reinforcement":[0],"Learning":[1],"(RL)":[2],"has":[3],"shown":[4],"great":[5],"performance":[6],"in":[7,13,29],"solving":[8],"sequential":[9],"decision-making":[10],"and":[11,33,78,146,157,207,232,245],"control":[12,140],"dynamic":[14],"environments":[15],"problems.":[16],"Despite":[17],"its":[18],"achievements,":[19],"training":[20,144],"Deep":[21],"Neural":[22],"Network":[23],"(DNN)":[24],"based":[25,124],"RL":[26,94,109,122,142,186],"is":[27,83,110,163,178,193],"expensive":[28],"terms":[30],"of":[31,36,40,59,89,104,139,183,225,235],"time":[32],"power":[34],"because":[35],"the":[37,52,55,71,87,93,102,126,147,167,190,220,229,236,241],"large":[38],"number":[39,88],"episodes":[41],"required":[42],"to":[43,96,152,268,275],"train":[44],"agents":[45,123,143],"with":[46,101,136,172,240,256],"high":[47],"dimensional":[48],"image":[49],"representations.":[50],"At":[51],"deployment":[53,73],"also,":[54],"massive":[56],"energy":[57,233,259,278],"footprint":[58],"deep":[60],"neural":[61,203],"networks":[62],"can":[63],"be":[64],"a":[65,117,153,194,201],"major":[66],"drawback.":[67],"Embedded":[68],"devices":[69],"as":[70,166,228],"main":[72],"platform,":[74],"are":[75,238],"intrinsically":[76],"resource-constrained":[77],"deploying":[79],"DNN":[80],"on":[81,125,263],"them":[82],"challenging.":[84],"Consequently,":[85],"reducing":[86],"actions":[90],"taken":[91],"by":[92],"agent":[95,148],"learn":[97],"desired":[98],"policy,":[99],"along":[100],"development":[103],"efficient":[105,181],"hardware":[106,119,208,217],"architectures":[107],"for":[108,121,180,185,219],"crucial.":[111],"In":[112],"this":[113],"paper,":[114],"we":[115,212],"propose":[116],"novel":[118],"architecture":[120,205],"learning":[127,135,182],"hierarchical":[128,134,155,221],"policies":[129,184],"method.":[130],"We":[131],"show":[132],"that":[133],"several":[137],"levels":[138],"improves":[141],"efficiency":[145],"converges":[149],"faster":[150],"compared":[151],"none":[154],"model":[156,246],"therefore":[158],"using":[159],"less":[160],"power.":[161],"This":[162],"especially":[164,188],"true":[165],"environment":[168],"becomes":[169],"more":[170],"complex":[171],"multiple":[173],"objective":[174],"sub-goals.":[175],"Our":[176],"method":[177],"important":[179],"agent,":[187],"when":[189],"target":[191],"platform":[192],"resource":[195],"constraint":[196],"embedded":[197],"device.":[198],"By":[199],"performing":[200],"systematic":[202],"network":[204],"search":[206],"design":[209,272],"space":[210],"exploration,":[211],"implemented":[213],"an":[214],"energy-efficient":[215,250],"scalable":[216],"accelerator":[218,237],"RL.":[222],"Hardware":[223],"factors":[224],"merit":[226],"such":[227],"latency,":[230],"throughput,":[231],"consumption":[234,260],"evaluated":[239],"various":[242],"processing":[243],"elements,":[244],"parameters.":[247],"The":[248],"most":[249],"configuration":[251],"achieves":[252],"139":[253],"fps":[254],"throughput":[255],"5.8":[257],"mJ":[258],"per":[261],"classification":[262],"Xilinx":[264],"Artix-7":[265],"FPGA.":[266],"Compared":[267],"similar":[269],"works":[270],"our":[271],"shows":[273],"up":[274],"3x":[276],"better":[277],"efficiency.":[279]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
