{"id":"https://openalex.org/W4366386429","doi":"https://doi.org/10.1109/tc.2023.3268400","title":"A Unified Parallel CORDIC-Based Hardware Architecture for LSTM Network Acceleration","display_name":"A Unified Parallel CORDIC-Based Hardware Architecture for LSTM Network Acceleration","publication_year":2023,"publication_date":"2023-04-19","ids":{"openalex":"https://openalex.org/W4366386429","doi":"https://doi.org/10.1109/tc.2023.3268400"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2023.3268400","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2023.3268400","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002093997","display_name":"Nadya A. Mohamed","orcid":"https://orcid.org/0000-0001-9887-8345"},"institutions":[{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Nadya A. Mohamed","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Rice University, Houston, TX, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Rice University, Houston, TX, USA","institution_ids":["https://openalex.org/I74775410"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085983829","display_name":"Joseph R. Cavallaro","orcid":"https://orcid.org/0000-0002-9841-1806"},"institutions":[{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joseph R. Cavallaro","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Rice University, Houston, TX, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Rice University, Houston, TX, USA","institution_ids":["https://openalex.org/I74775410"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5002093997"],"corresponding_institution_ids":["https://openalex.org/I74775410"],"apc_list":null,"apc_paid":null,"fwci":3.165,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.92964574,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"72","issue":"10","first_page":"2752","last_page":"2766"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7590494751930237},{"id":"https://openalex.org/keywords/cordic","display_name":"CORDIC","score":0.6226092576980591},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5757476091384888},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.49947142601013184},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.49042388796806335},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4499635398387909},{"id":"https://openalex.org/keywords/hardware-architecture","display_name":"Hardware architecture","score":0.44928810000419617},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4446372985839844},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4440969228744507},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.4312424063682556},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.41649624705314636},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.39840051531791687},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.32883667945861816},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2697991728782654},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.16254782676696777},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11315903067588806},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.10461926460266113}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7590494751930237},{"id":"https://openalex.org/C58870171","wikidata":"https://www.wikidata.org/wiki/Q116076","display_name":"CORDIC","level":3,"score":0.6226092576980591},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5757476091384888},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.49947142601013184},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.49042388796806335},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4499635398387909},{"id":"https://openalex.org/C65232700","wikidata":"https://www.wikidata.org/wiki/Q5656403","display_name":"Hardware architecture","level":3,"score":0.44928810000419617},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4446372985839844},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4440969228744507},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.4312424063682556},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.41649624705314636},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.39840051531791687},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32883667945861816},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2697991728782654},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.16254782676696777},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11315903067588806},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.10461926460266113},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2023.3268400","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2023.3268400","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1861840650","display_name":null,"funder_award_id":"CNS-2016727","funder_id":"https://openalex.org/F4320335353","funder_display_name":"National Science Foundation of Sri Lanka"},{"id":"https://openalex.org/G6210897874","display_name":null,"funder_award_id":"CNS-1827940","funder_id":"https://openalex.org/F4320335353","funder_display_name":"National Science Foundation of Sri Lanka"}],"funders":[{"id":"https://openalex.org/F4320335353","display_name":"National Science Foundation of Sri Lanka","ror":"https://ror.org/010xaa060"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1985956780","https://openalex.org/W1998399571","https://openalex.org/W2064675550","https://openalex.org/W2088372221","https://openalex.org/W2111013824","https://openalex.org/W2115452265","https://openalex.org/W2119144962","https://openalex.org/W2135297695","https://openalex.org/W2136045037","https://openalex.org/W2276892413","https://openalex.org/W2527036487","https://openalex.org/W2557257847","https://openalex.org/W2585720638","https://openalex.org/W2766338242","https://openalex.org/W2767644592","https://openalex.org/W2768993447","https://openalex.org/W2800017313","https://openalex.org/W2845210056","https://openalex.org/W2884001105","https://openalex.org/W2890165800","https://openalex.org/W2912581782","https://openalex.org/W2915106038","https://openalex.org/W2943076207","https://openalex.org/W2962962672","https://openalex.org/W2963145956","https://openalex.org/W2970233825","https://openalex.org/W2986975783","https://openalex.org/W3023146441","https://openalex.org/W3027324516","https://openalex.org/W3035768610","https://openalex.org/W3036878841","https://openalex.org/W3044794971","https://openalex.org/W3048414096","https://openalex.org/W3093799822","https://openalex.org/W3114397406","https://openalex.org/W3120434317","https://openalex.org/W3213656977","https://openalex.org/W4200442993","https://openalex.org/W4214845308","https://openalex.org/W4247198796","https://openalex.org/W4288346545","https://openalex.org/W6677580257","https://openalex.org/W6730061913","https://openalex.org/W6763653508","https://openalex.org/W6766961549"],"related_works":["https://openalex.org/W2147668509","https://openalex.org/W4205481467","https://openalex.org/W3207407077","https://openalex.org/W4383503138","https://openalex.org/W3131592046","https://openalex.org/W2010977501","https://openalex.org/W2009779270","https://openalex.org/W3214999411","https://openalex.org/W3170442433","https://openalex.org/W2965577915"],"abstract_inverted_index":{"Deep":[0],"Neural":[1,25],"Networks":[2,26],"(DNNs)":[3],"have":[4],"recently":[5],"become":[6],"the":[7,71,92,156,164,172,201],"standard":[8],"tool":[9],"for":[10,66,125,191,241],"solving":[11],"various":[12,192],"practical":[13],"problems":[14],"in":[15,97,163],"a":[16,35,59,85,108,134,143],"wide":[17],"range":[18],"of":[19,37,52,61,74,137],"applications":[20,99],"with":[21,39,116],"state-of-the-art":[22],"performance.":[23],"Recurrent":[24],"(RNNs)":[27],"such":[28],"as":[29],"Long":[30],"Short-Term":[31],"Memory":[32],"(LSTM)":[33],"are":[34,153],"subset":[36],"DNNs":[38],"fully":[40,183],"connected":[41,184],"single":[42,145],"or":[43],"multi-layer":[44],"networks.":[45],"The":[46,129,196,212,227],"complex":[47],"neurons":[48],"and":[49,80,120,142,151,166,181,222,232],"internal":[50],"states":[51],"LSTM":[53,75,126],"networks":[54],"enable":[55],"them":[56,64],"to":[57,91,178],"build":[58],"memory":[60],"events,":[62],"making":[63,188],"ideal":[65],"time":[67,95,209],"series":[68,210],"applications.":[69,195],"Despite":[70],"great":[72],"potential":[73],"networks,":[76],"their":[77],"heterogeneous":[78],"operations":[79],"computational":[81],"resource":[82],"requirements":[83],"create":[84],"vast":[86],"gap":[87],"when":[88],"it":[89,189,239],"comes":[90],"fast":[93],"processing":[94,139],"required":[96],"real-time":[98],"using":[100,155,206],"low-power,":[101],"low-cost":[102],"edge":[103,194,243],"devices.":[104],"This":[105],"work":[106],"proposes":[107],"novel":[109],"hardware":[110,130,174],"architecture":[111,197],"that":[112],"combines":[113],"serial-parallel":[114],"computation":[115],"matrix":[117],"algebra":[118],"concepts":[119],"efficient":[121],"low-power":[122,193],"computer":[123,160],"arithmetics":[124],"network":[127],"acceleration.":[128],"is":[131,198],"based":[132],"on":[133,200],"systolic":[135],"ring":[136],"outer-product-based":[138],"elements":[140],"(PEs)":[141],"reusable":[144],"activation":[146],"function":[147],"block":[148],"(AFB).":[149],"PEs":[150],"AFB":[152],"implemented":[154,213],"coordinate":[157],"rotation":[158],"digital":[159],"algorithm":[161],"(CORDIC)":[162],"linear":[165],"hyperbolic":[167],"modes.":[168],"Unlike":[169],"most":[170],"approaches,":[171],"proposed":[173,228],"can":[175],"be":[176],"configured":[177],"perform":[179],"recurrent":[180],"non-recurrent":[182],"layers":[185],"(FC)":[186],"computations,":[187],"suitable":[190,240],"validated":[199],"Xilinx":[202],"PYNQ-Z1":[203],"development":[204],"board":[205],"an":[207],"open-source":[208],"dataset.":[211],"design":[214],"achieves":[215],"<inline-formula><tex-math":[216,223,233],"notation=\"LaTeX\">$\\text{114}":[217],"\\mu":[218],"\\text{s}$</tex-math></inline-formula>":[219],"average":[220],"latency":[221,231],"notation=\"LaTeX\">$\\text{1.8":[224],"GOPS}$</tex-math></inline-formula>":[225],"throughput.":[226],"design's":[229],"low":[230],"notation=\"LaTeX\">$\\text{0.438":[234],"W}$</tex-math></inline-formula>":[235],"power":[236],"consumption":[237],"makes":[238],"resource-constrained":[242],"platforms.":[244]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
