{"id":"https://openalex.org/W4417169690","doi":"https://doi.org/10.1109/icecs66544.2025.11270611","title":"Energy-Efficient CNN Acceleration with MSDF Digit-Serial Arithmetic on FPGA","display_name":"Energy-Efficient CNN Acceleration with MSDF Digit-Serial Arithmetic on FPGA","publication_year":2025,"publication_date":"2025-11-17","ids":{"openalex":"https://openalex.org/W4417169690","doi":"https://doi.org/10.1109/icecs66544.2025.11270611"},"language":null,"primary_location":{"id":"doi:10.1109/icecs66544.2025.11270611","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icecs66544.2025.11270611","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 32nd IEEE International Conference on Electronics, Circuits and Systems (ICECS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075464273","display_name":"Muhammad Usman","orcid":"https://orcid.org/0000-0002-3393-5211"},"institutions":[{"id":"https://openalex.org/I120163777","display_name":"Regensburg University of Applied Sciences","ror":"https://ror.org/04b9vrm74","country_code":"DE","type":"education","lineage":["https://openalex.org/I120163777"]},{"id":"https://openalex.org/I60668342","display_name":"University of Regensburg","ror":"https://ror.org/01eezs655","country_code":"DE","type":"education","lineage":["https://openalex.org/I60668342"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Muhammad Usman","raw_affiliation_strings":["University of Regensburg,Faculty of Informatics and Data Science,Regensburg,Germany,93053"],"affiliations":[{"raw_affiliation_string":"University of Regensburg,Faculty of Informatics and Data Science,Regensburg,Germany,93053","institution_ids":["https://openalex.org/I60668342","https://openalex.org/I120163777"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049207550","display_name":"Yousef Sadegheih","orcid":"https://orcid.org/0009-0003-1766-5519"},"institutions":[{"id":"https://openalex.org/I60668342","display_name":"University of Regensburg","ror":"https://ror.org/01eezs655","country_code":"DE","type":"education","lineage":["https://openalex.org/I60668342"]},{"id":"https://openalex.org/I120163777","display_name":"Regensburg University of Applied Sciences","ror":"https://ror.org/04b9vrm74","country_code":"DE","type":"education","lineage":["https://openalex.org/I120163777"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Yousef Sadegheih","raw_affiliation_strings":["University of Regensburg,Faculty of Informatics and Data Science,Regensburg,Germany,93053"],"affiliations":[{"raw_affiliation_string":"University of Regensburg,Faculty of Informatics and Data Science,Regensburg,Germany,93053","institution_ids":["https://openalex.org/I60668342","https://openalex.org/I120163777"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064747056","display_name":"Dorit Merhof","orcid":"https://orcid.org/0000-0002-1672-2185"},"institutions":[{"id":"https://openalex.org/I120163777","display_name":"Regensburg University of Applied Sciences","ror":"https://ror.org/04b9vrm74","country_code":"DE","type":"education","lineage":["https://openalex.org/I120163777"]},{"id":"https://openalex.org/I60668342","display_name":"University of Regensburg","ror":"https://ror.org/01eezs655","country_code":"DE","type":"education","lineage":["https://openalex.org/I60668342"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Dorit Merhof","raw_affiliation_strings":["University of Regensburg,Faculty of Informatics and Data Science,Regensburg,Germany,93053"],"affiliations":[{"raw_affiliation_string":"University of Regensburg,Faculty of Informatics and Data Science,Regensburg,Germany,93053","institution_ids":["https://openalex.org/I60668342","https://openalex.org/I120163777"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5075464273"],"corresponding_institution_ids":["https://openalex.org/I120163777","https://openalex.org/I60668342"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.43039901,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.39169999957084656,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.39169999957084656,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.14589999616146088,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.08139999955892563,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.7107999920845032},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.7106999754905701},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.4927999973297119},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4390999972820282},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.424699991941452},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.42410001158714294},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.3756999969482422},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.375}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7296000123023987},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.7107999920845032},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.7106999754905701},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.6134999990463257},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.4927999973297119},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4390999972820282},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.424699991941452},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.42410001158714294},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3917999863624573},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.3756999969482422},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.375},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.36320000886917114},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.3619000017642975},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.34940001368522644},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.33000001311302185},{"id":"https://openalex.org/C65232700","wikidata":"https://www.wikidata.org/wiki/Q5656403","display_name":"Hardware architecture","level":3,"score":0.3181000053882599},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.31369999051094055},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.28540000319480896},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.2818000018596649},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.28040000796318054},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.27219998836517334},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2711000144481659},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icecs66544.2025.11270611","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icecs66544.2025.11270611","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 32nd IEEE International Conference on Electronics, Circuits and Systems (ICECS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2056767134","https://openalex.org/W2094756095","https://openalex.org/W2895531329","https://openalex.org/W4205313707","https://openalex.org/W4210396250","https://openalex.org/W4282601509","https://openalex.org/W4366428240","https://openalex.org/W4393241340","https://openalex.org/W4393373011","https://openalex.org/W4396858724","https://openalex.org/W4401725766","https://openalex.org/W4402082638","https://openalex.org/W4404563156","https://openalex.org/W4410840562"],"related_works":[],"abstract_inverted_index":{"This":[0,41],"paper":[1],"presents":[2],"an":[3,156],"energy-efficient":[4],"FPGA-based":[5,151],"hardware":[6,28],"accelerator":[7,152],"for":[8,172,200],"the":[9,13,37,85,95,132,150,193,196],"convolutional":[10],"layers":[11],"of":[12,81,98,158,195],"U-Net":[14,136],"architecture":[15,71],"used":[16],"in":[17,44,102,117,182,205],"image":[18],"segmentation.":[19],"While":[20],"digit-serial":[21],"arithmetic\u2014particularly":[22],"most-significant-digit-first":[23],"(MSDF)":[24],"techniques\u2014offers":[25],"a":[26,67,77,88,138,144,148],"compact":[27],"footprint,":[29],"it":[30],"suffers":[31],"from":[32],"initial":[33],"latency":[34,91,97],"before":[35],"producing":[36],"first":[38],"output":[39],"digit.":[40],"delay":[42],"accumulates":[43],"cascaded":[45,100],"operations":[46,75],"like":[47],"multiplication":[48],"followed":[49],"by":[50],"addition,":[51],"where":[52],"each":[53],"unit":[54],"introduces":[55,87],"its":[56],"own":[57],"startup":[58],"overhead.":[59],"To":[60],"overcome":[61],"this,":[62],"we":[63],"propose":[64],"to":[65,112,155,165,169,186],"use":[66],"merged":[68,197],"multiply-add":[69],"(MMA)":[70],"that":[72],"fuses":[73],"these":[74],"into":[76],"unified":[78],"pipeline.":[79],"Instead":[80],"incurring":[82],"separate":[83],"delays,":[84],"MMA":[86,108],"single,":[89],"streamlined":[90],"per":[92],"iteration\u2014shorter":[93],"than":[94,123,147],"combined":[96],"conventional":[99,128],"units\u2014resulting":[101],"enhanced":[103],"throughput":[104],"and":[105,127,208],"efficiency.":[106],"The":[107,175],"units":[109],"are":[110],"designed":[111],"process":[113],"spatial":[114],"input":[115],"depths":[116],"parallel,":[118],"achieving":[119],"significantly":[120],"higher":[121,160],"performance":[122],"both":[124],"standalone":[125],"MSDF-based":[126,187],"designs.":[129],"We":[130],"evaluate":[131],"proposed":[133],"design":[134,176],"using":[135],"as":[137],"target":[139],"application.":[140],"Despite":[141],"operating":[142],"at":[143],"lower":[145],"frequency":[146],"CPU,":[149],"achieves":[153],"up":[154,164],"order":[157],"magnitude":[159],"energy":[161,183],"efficiency,":[162],"delivering":[163],"15.14":[166],"GOPS/W":[167,171],"compared":[168,185],"1.93":[170],"CPU-based":[173],"inference.":[174],"also":[177],"shows":[178],"approximately":[179],"9\u00d7":[180],"reduction":[181],"consumption":[184],"FPGA":[188],"implementations.":[189],"These":[190],"results":[191],"highlight":[192],"efficacy":[194],"arithmetic":[198],"approach":[199],"resource-constrained,":[201],"latency-sensitive":[202],"edge":[203],"applications":[204],"medical":[206],"imaging":[207],"computer":[209],"vision.":[210]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-12-09T00:00:00"}
