{"id":"https://openalex.org/W4413278563","doi":"https://doi.org/10.1109/icfpt64416.2024.11113396","title":"HEPPO: Hardware-Efficient Proximal Policy Optimization a Universal Pipelined Architecture for Generalized Advantage Estimation","display_name":"HEPPO: Hardware-Efficient Proximal Policy Optimization a Universal Pipelined Architecture for Generalized Advantage Estimation","publication_year":2024,"publication_date":"2024-12-10","ids":{"openalex":"https://openalex.org/W4413278563","doi":"https://doi.org/10.1109/icfpt64416.2024.11113396"},"language":"en","primary_location":{"id":"doi:10.1109/icfpt64416.2024.11113396","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icfpt64416.2024.11113396","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Field Programmable Technology (ICFPT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Hazem Taha","orcid":null},"institutions":[{"id":"https://openalex.org/I98251732","display_name":"McMaster University","ror":"https://ror.org/02fa3aq29","country_code":"CA","type":"education","lineage":["https://openalex.org/I98251732"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Hazem Taha","raw_affiliation_strings":["McMaster University,Hamilton,Ontario,L8S 4L8"],"affiliations":[{"raw_affiliation_string":"McMaster University,Hamilton,Ontario,L8S 4L8","institution_ids":["https://openalex.org/I98251732"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005226229","display_name":"Ameer Abdelhadi","orcid":"https://orcid.org/0000-0003-4683-8901"},"institutions":[{"id":"https://openalex.org/I98251732","display_name":"McMaster University","ror":"https://ror.org/02fa3aq29","country_code":"CA","type":"education","lineage":["https://openalex.org/I98251732"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ameer M. S. Abdelhadi","raw_affiliation_strings":["McMaster University,Hamilton,Ontario,L8S 4L8"],"affiliations":[{"raw_affiliation_string":"McMaster University,Hamilton,Ontario,L8S 4L8","institution_ids":["https://openalex.org/I98251732"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I98251732"],"apc_list":null,"apc_paid":null,"fwci":0.4787,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.69183877,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"01","last_page":"09"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9222000241279602,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9222000241279602,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7493579387664795},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.562846302986145},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5100436806678772},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4770566523075104},{"id":"https://openalex.org/keywords/hardware-architecture","display_name":"Hardware architecture","score":0.44269391894340515},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.4189060628414154},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.4164285659790039},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.39176177978515625},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.13334739208221436},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.1247217059135437},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07476577162742615}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7493579387664795},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.562846302986145},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5100436806678772},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4770566523075104},{"id":"https://openalex.org/C65232700","wikidata":"https://www.wikidata.org/wiki/Q5656403","display_name":"Hardware architecture","level":3,"score":0.44269391894340515},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.4189060628414154},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.4164285659790039},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.39176177978515625},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.13334739208221436},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.1247217059135437},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07476577162742615},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icfpt64416.2024.11113396","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icfpt64416.2024.11113396","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Field Programmable Technology (ICFPT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2067418954","https://openalex.org/W2767976779","https://openalex.org/W3003961668","https://openalex.org/W3035681682","https://openalex.org/W3107420481","https://openalex.org/W3168385254","https://openalex.org/W3205232001","https://openalex.org/W3213742958","https://openalex.org/W4281660701","https://openalex.org/W4394998398","https://openalex.org/W4403124238"],"related_works":["https://openalex.org/W2576994247","https://openalex.org/W2608353378","https://openalex.org/W4249206767","https://openalex.org/W2563559453","https://openalex.org/W2382330008","https://openalex.org/W1519970947","https://openalex.org/W2544482289","https://openalex.org/W3152699334","https://openalex.org/W2154401804","https://openalex.org/W2127430515"],"abstract_inverted_index":{"This":[0,48,87],"paper":[1],"introduces":[2],"HEPPO,":[3],"an":[4],"FPGA-based":[5],"accelerator":[6],"designed":[7],"to":[8],"optimize":[9],"the":[10,52],"Generalized":[11],"Advantage":[12],"Estimation":[13],"(GAE)":[14],"stage":[15],"in":[16,101,111,160,167,177],"Proximal":[17],"Policy":[18],"Optimization":[19],"(PPO).":[20],"Unlike":[21],"previous":[22],"approaches":[23],"that":[24],"focused":[25],"on":[26,43,118],"trajectory":[27],"collection":[28],"and":[29,76,93,104,126,145,163],"actor-critic":[30],"updates,":[31],"HEPPO":[32],"addresses":[33],"GAE's":[34],"computational":[35],"demands":[36],"with":[37,123],"a":[38,44,98,105,116,119,156,164],"parallel,":[39],"pipelined":[40],"architecture":[41],"implemented":[42],"single":[45,120],"System-on-Chip":[46],"(SoC).":[47],"design":[49],"allows":[50],"for":[51,59,79,174],"adaptation":[53],"of":[54,132],"various":[55],"hardware":[56],"accelerators":[57],"tailored":[58],"different":[60],"PPO":[61,150,161],"phases.":[62],"A":[63],"key":[64],"innovation":[65],"is":[66],"our":[67],"strategic":[68],"standardization":[69,75,78],"technique,":[70],"which":[71],"combines":[72],"dynamic":[73],"reward":[74],"block":[77],"values,":[80],"followed":[81],"by":[82],"8":[83],"-bit":[84],"uniform":[85],"quantization.":[86],"method":[88],"stabilizes":[89],"learning,":[90],"enhances":[91],"performance,":[92],"manages":[94],"memory":[95,102,168],"bottlenecks,":[96,147],"achieving":[97],"4x":[99],"reduction":[100,166],"usage":[103],"<tex":[106],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[107],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.5":[108],"x$</tex>":[109],"increase":[110,159],"cumulative":[112],"rewards.":[113],"We":[114],"propose":[115],"solution":[117,141],"SoC":[121],"device":[122],"programmable":[124],"logic":[125],"embedded":[127],"processors,":[128],"delivering":[129],"throughput":[130,146],"orders":[131],"magnitude":[133],"higher":[134],"than":[135],"traditional":[136],"CPUGPU":[137],"systems.":[138],"Our":[139],"single-chip":[140],"minimizes":[142],"communication":[143],"latency":[144],"significantly":[148],"boosting":[149],"training":[151],"efficiency.":[152],"Experimental":[153],"results":[154],"show":[155],"30":[157],"%":[158],"speed":[162],"substantial":[165],"access":[169],"time,":[170],"underscoring":[171],"HEPPO's":[172],"potential":[173],"broad":[175],"applicability":[176],"hardware-efficient":[178],"reinforcement":[179],"learning":[180],"algorithms.":[181]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
