{"id":"https://openalex.org/W4411574893","doi":"https://doi.org/10.32604/cmc.2025.063723","title":"Low-Complexity Hardware Architecture for Batch Normalization of CNN Training Accelerator","display_name":"Low-Complexity Hardware Architecture for Batch Normalization of CNN Training Accelerator","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4411574893","doi":"https://doi.org/10.32604/cmc.2025.063723"},"language":"en","primary_location":{"id":"doi:10.32604/cmc.2025.063723","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.063723","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.32604/cmc.2025.063723","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116901402","display_name":"Go-Eun Woo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Go-Eun Woo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070712144","display_name":"Sang-Bo Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sang-Bo Park","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042714912","display_name":"Gi-Tae Park","orcid":"https://orcid.org/0000-0003-0469-4030"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gi-Tae Park","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007771475","display_name":"Muhammad Junaid","orcid":"https://orcid.org/0000-0003-0500-904X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muhammad Junaid","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032247984","display_name":"HyungWon Kim","orcid":"https://orcid.org/0000-0003-2602-2075"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hyung-Won Kim","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5116901402"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.8599,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.91533585,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"84","issue":"2","first_page":"3241","last_page":"3257"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9391000270843506,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9391000270843506,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.7999705672264099},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6548839807510376},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.5219087600708008},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.515475332736969},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4221653938293457},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3907705247402191},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.328849732875824},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.0623684823513031}],"concepts":[{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.7999705672264099},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6548839807510376},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.5219087600708008},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.515475332736969},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4221653938293457},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3907705247402191},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.328849732875824},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0623684823513031},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.32604/cmc.2025.063723","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.063723","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.32604/cmc.2025.063723","is_oa":true,"landing_page_url":"https://doi.org/10.32604/cmc.2025.063723","pdf_url":null,"source":{"id":"https://openalex.org/S4210191605","display_name":"Computers, materials & continua/Computers, materials & continua (Print)","issn_l":"1546-2218","issn":["1546-2218","1546-2226"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers, Materials &amp; Continua","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1498436455","https://openalex.org/W2069143585","https://openalex.org/W2789027062","https://openalex.org/W2919115771","https://openalex.org/W2922073769","https://openalex.org/W3002842489","https://openalex.org/W3007895486","https://openalex.org/W4362689902","https://openalex.org/W4393219433"],"related_works":["https://openalex.org/W2591697403","https://openalex.org/W2944728705","https://openalex.org/W2904022177","https://openalex.org/W2359348847","https://openalex.org/W3011538607","https://openalex.org/W4294432981","https://openalex.org/W4321441197","https://openalex.org/W2953716828","https://openalex.org/W2469820710","https://openalex.org/W2038503502"],"abstract_inverted_index":{"On-device":[0],"Artificial":[1],"Intelligence":[2],"(AI)":[3],"accelerators":[4],"capable":[5],"of":[6,76,269],"not":[7],"only":[8],"inference":[9],"but":[10,46],"also":[11],"training":[12,41,74,97,258,282],"neural":[13,43,220],"network":[14],"models":[15],"are":[16],"in":[17,20,49,60,174],"increasing":[18],"demand":[19],"the":[21,73,160,224],"industrial":[22],"AI":[23],"field,":[24],"where":[25],"frequent":[26,32],"retraining":[27],"is":[28,38,273],"crucial":[29],"due":[30,55],"to":[31,40,56,183],"production":[33],"changes.":[34],"Batch":[35],"normalization":[36,168,193,272],"(BN)":[37],"fundamental":[39],"convolutional":[42],"networks":[44],"(CNNs),":[45],"its":[47],"implementation":[48,268],"compact":[50],"accelerator":[51,69,94,169],"chips":[52],"remains":[53],"challenging":[54],"computational":[57,83,102,286],"complexity,":[58],"particularly":[59],"calculating":[61],"statistical":[62],"parameters":[63],"and":[64,116,130,149,177,228,235,255,287],"gradients":[65],"across":[66,114],"mini-batches.":[67],"Existing":[68],"architectures":[70],"either":[71],"compromise":[72],"accuracy":[75,98,197],"CNNs":[77],"through":[78,104],"approximations":[79],"or":[80],"require":[81],"substantial":[82],"resources,":[84],"limiting":[85],"their":[86],"practical":[87,279],"deployment.":[88],"We":[89],"present":[90],"a":[91,139,171,184,219],"hardware-optimized":[92],"BN":[93,249],"that":[95,246,265],"maintains":[96],"while":[99,195],"significantly":[100,284],"reducing":[101],"overhead":[103],"three":[105],"novel":[106],"techniques:":[107],"(1)":[108],"resource-sharing":[109],"for":[110,122,133,259],"efficient":[111,266],"resource":[112],"utilization":[113],"forward":[115],"backward":[117],"passes,":[118],"(2)":[119],"interleaved":[120],"buffering":[121],"reduced":[123,285],"dynamic":[124],"random-access":[125],"memory":[126],"(DRAM)":[127],"access":[128],"latencies,":[129],"(3)":[131],"zero-skipping":[132],"minimal":[134],"gradient":[135],"computation.":[136],"Implemented":[137],"on":[138,159],"VCU118":[140],"Field":[141],"Programmable":[142],"Gate":[143],"Array":[144],"(FPGA)":[145],"at":[146,204,210],"100":[147],"MHz":[148],"validated":[150],"using":[151],"You":[152],"Only":[153],"Look":[154],"Once":[155],"version":[156],"2-tiny":[157],"(YOLOv2-tiny)":[158],"PASCAL":[161],"Visual":[162],"Object":[163],"Classes":[164],"(VOC)":[165],"dataset,":[166],"our":[167,247],"achieves":[170],"72%":[172],"reduction":[173],"processing":[175,221],"time":[176],"83%":[178],"lower":[179],"power":[180,288],"consumption":[181],"compared":[182],"2.4":[185],"GHz":[186],"Intel":[187],"Central":[188],"Processing":[189],"Unit":[190],"(CPU)":[191],"software":[192],"implementation,":[194],"maintaining":[196],"(0.51%":[198],"mean":[199],"Average":[200],"Precision":[201],"(mAP)":[202],"drop":[203],"floating-point":[205,212],"32":[206],"bits":[207,214],"(FP32),":[208],"1.35%":[209],"brain":[211],"16":[213],"(bfloat16)).":[215],"When":[216],"integrated":[217],"into":[218],"unit":[222],"(NPU),":[223],"design":[225,251],"demonstrates":[226],"63%":[227],"97%":[229],"performance":[230],"improvements":[231],"over":[232],"AMD":[233],"CPU":[234],"Reduced":[236],"Instruction":[237],"Set":[238],"Computing-V":[239],"(RISC-V)":[240],"implementations,":[241],"respectively.":[242],"These":[243],"results":[244,263],"confirm":[245],"proposed":[248],"hardware":[250,267],"enables":[252],"efficient,":[253],"high-accuracy,":[254],"power-saving":[256],"on-device":[257,280],"modern":[260],"CNNs.":[261],"Our":[262],"demonstrate":[264],"standard":[270],"batch":[271],"achievable":[274],"without":[275],"sacrificing":[276],"accuracy,":[277],"enabling":[278],"CNN":[281],"with":[283],"requirements.":[289]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
