{"id":"https://openalex.org/W3202164011","doi":"https://doi.org/10.1109/tetc.2021.3115475","title":"Towards Execution-Efficient LSTMs via Hardware-Guided Grow-and-Prune Paradigm","display_name":"Towards Execution-Efficient LSTMs via Hardware-Guided Grow-and-Prune Paradigm","publication_year":2021,"publication_date":"2021-10-02","ids":{"openalex":"https://openalex.org/W3202164011","doi":"https://doi.org/10.1109/tetc.2021.3115475","mag":"3202164011"},"language":"en","primary_location":{"id":"doi:10.1109/tetc.2021.3115475","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetc.2021.3115475","pdf_url":null,"source":{"id":"https://openalex.org/S2496326734","display_name":"IEEE Transactions on Emerging Topics in Computing","issn_l":"2168-6750","issn":["2168-6750","2376-4562"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002444694","display_name":"Hongxu Yin","orcid":"https://orcid.org/0000-0002-6481-6389"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hongxu Yin","raw_affiliation_strings":["Department of Electrical Engineering, Princeton University, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Princeton University, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103032013","display_name":"Guoyang Chen","orcid":"https://orcid.org/0000-0002-5531-8986"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guoyang Chen","raw_affiliation_strings":["Alibaba Group US Inc., Sunnyvale, CA, USA"],"affiliations":[{"raw_affiliation_string":"Alibaba Group US Inc., Sunnyvale, CA, USA","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101409982","display_name":"Yingmin Li","orcid":"https://orcid.org/0000-0003-0079-4130"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yingmin Li","raw_affiliation_strings":["Alibaba Group US Inc., Sunnyvale, CA, USA"],"affiliations":[{"raw_affiliation_string":"Alibaba Group US Inc., Sunnyvale, CA, USA","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018356320","display_name":"Shuai Che","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuai Che","raw_affiliation_strings":["Alibaba Group US Inc., Sunnyvale, CA, USA"],"affiliations":[{"raw_affiliation_string":"Alibaba Group US Inc., Sunnyvale, CA, USA","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100426678","display_name":"Weifeng Zhang","orcid":"https://orcid.org/0000-0002-4529-1679"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weifeng Zhang","raw_affiliation_strings":["Alibaba Group US Inc., Sunnyvale, CA, USA"],"affiliations":[{"raw_affiliation_string":"Alibaba Group US Inc., Sunnyvale, CA, USA","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086131079","display_name":"Niraj K. Jha","orcid":"https://orcid.org/0000-0002-1539-0369"},"institutions":[{"id":"https://openalex.org/I20089843","display_name":"Princeton University","ror":"https://ror.org/00hx57361","country_code":"US","type":"education","lineage":["https://openalex.org/I20089843"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Niraj K. Jha","raw_affiliation_strings":["Department of Electrical Engineering, Princeton University, Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Princeton University, Princeton, NJ, USA","institution_ids":["https://openalex.org/I20089843"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5002444694"],"corresponding_institution_ids":["https://openalex.org/I20089843"],"apc_list":null,"apc_paid":null,"fwci":0.2719,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.63599768,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"10","issue":"4","first_page":"1799","last_page":"1809"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9031105041503906},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6154413819313049},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5754876136779785},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.516975998878479},{"id":"https://openalex.org/keywords/treebank","display_name":"Treebank","score":0.4562685191631317},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4518080949783325},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.44212037324905396},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4268215000629425},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.18106910586357117},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.17412742972373962}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9031105041503906},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6154413819313049},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5754876136779785},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.516975998878479},{"id":"https://openalex.org/C206134035","wikidata":"https://www.wikidata.org/wiki/Q811525","display_name":"Treebank","level":3,"score":0.4562685191631317},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4518080949783325},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.44212037324905396},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4268215000629425},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.18106910586357117},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.17412742972373962},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tetc.2021.3115475","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetc.2021.3115475","pdf_url":null,"source":{"id":"https://openalex.org/S2496326734","display_name":"IEEE Transactions on Emerging Topics in Computing","issn_l":"2168-6750","issn":["2168-6750","2376-4562"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W1578084252","https://openalex.org/W1591801644","https://openalex.org/W2064675550","https://openalex.org/W2091553595","https://openalex.org/W2093647425","https://openalex.org/W2127141656","https://openalex.org/W2130942839","https://openalex.org/W2157590573","https://openalex.org/W2185726469","https://openalex.org/W2193413348","https://openalex.org/W2294710185","https://openalex.org/W2554302513","https://openalex.org/W2560017826","https://openalex.org/W2754526845","https://openalex.org/W2764043458","https://openalex.org/W2765315405","https://openalex.org/W2767785892","https://openalex.org/W2769644379","https://openalex.org/W2774000609","https://openalex.org/W2792376130","https://openalex.org/W2794664194","https://openalex.org/W2885533908","https://openalex.org/W2899771611","https://openalex.org/W2905209374","https://openalex.org/W2934853022","https://openalex.org/W2945580137","https://openalex.org/W2949866178","https://openalex.org/W2951008357","https://openalex.org/W2952436057","https://openalex.org/W2963163009","https://openalex.org/W2963319203","https://openalex.org/W2963918968","https://openalex.org/W2964199361","https://openalex.org/W2980137827","https://openalex.org/W2991232789","https://openalex.org/W2995785927","https://openalex.org/W3034957837","https://openalex.org/W4232069359","https://openalex.org/W4244330903","https://openalex.org/W4297689207","https://openalex.org/W6635446068","https://openalex.org/W6638318767","https://openalex.org/W6641578900","https://openalex.org/W6679436768","https://openalex.org/W6687566353","https://openalex.org/W6730047919","https://openalex.org/W6736780897","https://openalex.org/W6744208968","https://openalex.org/W6745148473","https://openalex.org/W6746200960","https://openalex.org/W6746331415","https://openalex.org/W6746839373","https://openalex.org/W6749518548","https://openalex.org/W6756040250","https://openalex.org/W6764398373"],"related_works":["https://openalex.org/W3142119062","https://openalex.org/W159209093","https://openalex.org/W589103562","https://openalex.org/W1991220724","https://openalex.org/W2251234095","https://openalex.org/W131522978","https://openalex.org/W2964047924","https://openalex.org/W3150294986","https://openalex.org/W2962832505","https://openalex.org/W2743945814"],"abstract_inverted_index":{"Long":[0],"short-term":[1],"memory":[2],"(LSTM)":[3],"applications":[4],"need":[5],"fast":[6],"yet":[7,63],"compact":[8],"models.":[9,66,247],"Neural":[10],"network":[11,26,40,89],"compression":[12,34,102],"approaches,":[13],"e.g.,":[14],"the":[15,83,99,113,153,160,166,193,198,203],"grow-and-prune":[16],"paradigm,":[17],"have":[18,140],"proved":[19],"to":[20,47,107,152,179,214,223],"be":[21],"promising":[22],"for":[23,60,159,197,238],"cutting":[24],"down":[25],"complexity":[27,41],"by":[28,170,177,206,221],"skipping":[29],"insignificant":[30],"weights.":[31],"However,":[32],"current":[33],"strategies":[35],"remain":[36],"mostly":[37],"hardware-agnostic":[38],"and":[39,117,126,147,173,217,244],"reduction":[42],"does":[43],"not":[44],"always":[45],"translate":[46],"execution":[48,129],"efficiency.":[49],"In":[50],"this":[51],"work,":[52],"we":[53,81,119,164,201],"propose":[54],"a":[55,108,121],"hardware-guided":[56],"symbiotic":[57],"training":[58],"methodology":[59],"compact,":[61,242],"accurate,":[62,243],"execution-efficient":[64,245],"inference":[65,92,246],"It":[67],"is":[68],"based":[69],"on":[70,144,182,226],"our":[71,142],"observation":[72,95],"that":[73],"hardware":[74],"may":[75],"introduce":[76],"substantial":[77],"non-monotonic":[78],"behavior,":[79],"which":[80,104],"call":[82],"latency":[84,134,176,220],"hysteresis":[85,115],"effect,":[86],"when":[87],"evaluating":[88],"size":[90,205],"versus":[91],"latency.":[93],"This":[94],"raises":[96],"question":[97],"about":[98],"mainstream":[100],"smaller-dimension-is-better":[101],"strategy,":[103],"often":[105],"leads":[106],"sub-optimal":[109],"model":[110,124,204],"architecture.":[111],"Leveraging":[112],"hardware-impacted":[114],"effect":[116],"sparsity,":[118],"enable":[120],"symbiosis":[122],"of":[123,168],"compactness":[125],"accuracy":[127,190],"with":[128,241],"efficiency,":[130],"thus":[131],"reducing":[132],"LSTM":[133,156],"while":[135],"increasing":[136],"its":[137],"accuracy.":[138],"We":[139],"evaluated":[141],"approach":[143],"language":[145],"modeling":[146],"speech":[148],"recognition":[149],"applications.":[150],"Relative":[151],"traditional":[154],"stacked":[155],"architecture":[157,195],"obtained":[158,196],"Penn":[161],"Treebank":[162],"dataset,":[163,200],"reduce":[165,202],"number":[167],"parameters":[169],"18.0\u00d7":[171],"(30.5\u00d7)":[172],"measured":[174,218],"run-time":[175,219],"up":[178,222],"2.4\u00d7":[180],"(5.2\u00d7)":[181],"Nvidia":[183,227],"GPUs":[184,228],"(Intel":[185,229],"Xeon":[186,230],"CPUs)":[187],"without":[188],"any":[189],"degradation.":[191],"For":[192],"DeepSpeech2":[194],"AN4":[199],"7.0\u00d7":[207],"(19.4\u00d7),":[208],"word":[209],"error":[210],"rate":[211],"from":[212],"12.9%":[213],"9.9%":[215],"(10.4%),":[216],"1.7\u00d7":[224],"(2.4\u00d7)":[225],"CPUs).":[231],"Our":[232],"method":[233],"consistently":[234],"outperforms":[235],"prior":[236],"art":[237],"both":[239],"applications,":[240]},"counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
