{"id":"https://openalex.org/W4413278534","doi":"https://doi.org/10.1109/icfpt64416.2024.11113397","title":"Hardware/Software Co-Design of RISC-V Extensions for Accelerating Sparse DNNs on FPGAs","display_name":"Hardware/Software Co-Design of RISC-V Extensions for Accelerating Sparse DNNs on FPGAs","publication_year":2024,"publication_date":"2024-12-10","ids":{"openalex":"https://openalex.org/W4413278534","doi":"https://doi.org/10.1109/icfpt64416.2024.11113397"},"language":"en","primary_location":{"id":"doi:10.1109/icfpt64416.2024.11113397","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icfpt64416.2024.11113397","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Field Programmable Technology (ICFPT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101507575","display_name":"Muhammad Sabih","orcid":"https://orcid.org/0000-0003-2066-646X"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Muhammad Sabih","raw_affiliation_strings":["Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg (FAU),Department of Computer Science,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg (FAU),Department of Computer Science,Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011733715","display_name":"Abrarul Karim","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Abrarul Karim","raw_affiliation_strings":["Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg (FAU),Department of Computer Science,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg (FAU),Department of Computer Science,Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045533648","display_name":"Jakob Wittmann","orcid":null},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jakob Wittmann","raw_affiliation_strings":["Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg (FAU),Department of Computer Science,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg (FAU),Department of Computer Science,Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072039340","display_name":"Frank Hannig","orcid":"https://orcid.org/0000-0003-3663-6484"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Frank Hannig","raw_affiliation_strings":["Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg (FAU),Department of Computer Science,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg (FAU),Department of Computer Science,Germany","institution_ids":["https://openalex.org/I181369854"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076672029","display_name":"J\u00fcrgen Teich","orcid":"https://orcid.org/0000-0001-6285-5862"},"institutions":[{"id":"https://openalex.org/I181369854","display_name":"Friedrich-Alexander-Universit\u00e4t Erlangen-N\u00fcrnberg","ror":"https://ror.org/00f7hpc57","country_code":"DE","type":"education","lineage":["https://openalex.org/I181369854"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"J\u00fcrgen Teich","raw_affiliation_strings":["Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg (FAU),Department of Computer Science,Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Friedrich-Alexander-Universit&#x00E4;t Erlangen-N&#x00FC;rnberg (FAU),Department of Computer Science,Germany","institution_ids":["https://openalex.org/I181369854"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101507575"],"corresponding_institution_ids":["https://openalex.org/I181369854"],"apc_list":null,"apc_paid":null,"fwci":1.5459,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.83745681,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"01","last_page":"09"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9853000044822693,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9746000170707703,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.75334632396698},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6969699859619141},{"id":"https://openalex.org/keywords/reduced-instruction-set-computing","display_name":"Reduced instruction set computing","score":0.5154538750648499},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5039793848991394},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.48442962765693665},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4803679883480072},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.4415930509567261},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.22000613808631897},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.21819671988487244}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.75334632396698},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6969699859619141},{"id":"https://openalex.org/C126298526","wikidata":"https://www.wikidata.org/wiki/Q189376","display_name":"Reduced instruction set computing","level":3,"score":0.5154538750648499},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5039793848991394},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.48442962765693665},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4803679883480072},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.4415930509567261},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.22000613808631897},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.21819671988487244}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icfpt64416.2024.11113397","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icfpt64416.2024.11113397","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Field Programmable Technology (ICFPT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1506342804","https://openalex.org/W1861492603","https://openalex.org/W2076587863","https://openalex.org/W2949619037","https://openalex.org/W3090912412","https://openalex.org/W3114479342","https://openalex.org/W4220818878","https://openalex.org/W4244080578","https://openalex.org/W4311224386","https://openalex.org/W4322761281","https://openalex.org/W4379115981","https://openalex.org/W4380303214","https://openalex.org/W4386571432","https://openalex.org/W4392084966","https://openalex.org/W4401568359","https://openalex.org/W4401568656"],"related_works":["https://openalex.org/W2096844293","https://openalex.org/W2363944576","https://openalex.org/W2351041855","https://openalex.org/W2570254841","https://openalex.org/W1967938402","https://openalex.org/W2386041993","https://openalex.org/W1608572506","https://openalex.org/W2035206467","https://openalex.org/W4413068361","https://openalex.org/W2512308948"],"abstract_inverted_index":{"The":[0,123],"customizability":[1],"of":[2,69,94,99,110,158,167,172,188,192,197,207,219],"RISC-V":[3,55],"makes":[4],"it":[5],"an":[6],"attractive":[7],"choice":[8],"for":[9,57],"accelerating":[10,58,70],"deep":[11],"neural":[12],"networks":[13],"(DNNs).":[14],"It":[15],"can":[16,164,184],"be":[17],"achieved":[18],"through":[19],"instruction":[20],"set":[21],"extensions":[22,56],"and":[23,44,63,72,101,160,174,239],"corresponding":[24],"custom":[25,125],"functional":[26,126],"units.":[27],"Yet,":[28],"efficiently":[29],"exploiting":[30],"these":[31],"opportunities":[32],"requires":[33],"a":[34,104,108,140,170,180,195,204],"hardware/software":[35],"co-design":[36],"approach":[37],"in":[38,107,119],"which":[39],"the":[40,67,95,115,120,153,213,217],"DNN":[41,59,111],"model,":[42],"software,":[43],"hardware":[45],"are":[46],"designed":[47],"together.":[48],"In":[49],"this":[50,129],"paper,":[51],"we":[52,91,138],"propose":[53,139,179],"novel":[54,79],"models":[60],"containing":[61],"semi-structured":[62,89,161],"unstructured":[64,73,136,159],"sparsity.":[65],"While":[66],"idea":[68],"structured":[71],"pruning":[74,162],"is":[75],"not":[76],"new,":[77],"our":[78,227],"design":[80,182],"offers":[81],"various":[82],"advantages":[83],"over":[84],"other":[85],"designs.":[86],"To":[87,134],"exploit":[88,135],"sparsity,":[90,137],"take":[92],"advantage":[93],"finegrained":[96],"(bit-level)":[97],"configurability":[98],"FPGAs":[100],"suggest":[102],"reserving":[103],"few":[105],"bits":[106],"block":[109],"weights":[112],"to":[113,131,169,194],"encode":[114],"information":[116,130],"about":[117],"sparsity":[118],"succeeding":[121],"blocks.":[122],"proposed":[124],"unit":[127,145],"utilizes":[128],"skip":[132],"computations.":[133],"variable":[141],"cycle":[142],"sequential":[143],"multiply-and-accumulate":[144],"that":[146,183,212],"performs":[147],"only":[148],"as":[149,152,234],"many":[150],"multiplications":[151],"non-zero":[154],"weights.":[155],"Our":[156,201],"implementation":[157],"accelerators":[163],"provide":[165],"speedups":[166,191],"up":[168,193],"factor":[171,196],"3":[173],"4,":[175],"respectively.":[176],"We":[177,225],"then":[178],"combined":[181],"accelerate":[185],"both":[186],"types":[187],"sparsities,":[189],"providing":[190],"<tex":[198],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[199],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$\\mathbf{5}$</tex>.":[200],"designs":[202,228],"consume":[203],"small":[205,223],"amount":[206],"additional":[208],"FPGA":[209],"resources":[210],"such":[211,233],"resulting":[214],"co-designs":[215],"enable":[216],"acceleration":[218],"DNNs":[220],"even":[221],"on":[222,229],"FPGAs.":[224],"benchmark":[226],"standard":[230],"TinyML":[231],"applications":[232],"keyword":[235],"spotting,":[236],"image":[237],"classification,":[238],"person":[240],"detection.":[241]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6}],"updated_date":"2026-05-17T08:19:37.847499","created_date":"2025-10-10T00:00:00"}
