{"id":"https://openalex.org/W7165179650","doi":"https://doi.org/10.1145/3787109.3815213","title":"GUPrecision: Group-Wise Uniform Precision Accelerator for Depthwise Separable Convolution using Hardware-Algorithm Co-Design","display_name":"GUPrecision: Group-Wise Uniform Precision Accelerator for Depthwise Separable Convolution using Hardware-Algorithm Co-Design","publication_year":2026,"publication_date":"2026-06-18","ids":{"openalex":"https://openalex.org/W7165179650","doi":"https://doi.org/10.1145/3787109.3815213"},"language":null,"primary_location":{"id":"doi:10.1145/3787109.3815213","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3787109.3815213","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Great Lakes Symposium on VLSI 2026","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3787109.3815213","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101508336","display_name":"Yuehua Chen","orcid":"https://orcid.org/0000-0002-4646-4017"},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Yi Chen","raw_affiliation_strings":["RWTH Aachen University, Aachen, Germany"],"raw_orcid":"https://orcid.org/0009-0007-2101-5235","affiliations":[{"raw_affiliation_string":"RWTH Aachen University, Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043984277","display_name":"Jie Lou","orcid":"https://orcid.org/0000-0003-0380-8585"},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jie Lou","raw_affiliation_strings":["RWTH Aachen University, Aachen, Germany"],"raw_orcid":"https://orcid.org/0000-0003-0380-8585","affiliations":[{"raw_affiliation_string":"RWTH Aachen University, Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092378557","display_name":"Malte Wabnitz","orcid":"https://orcid.org/0009-0000-9122-3610"},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Malte Wabnitz","raw_affiliation_strings":["RWTH Aachen University, Aachen, Germany"],"raw_orcid":"https://orcid.org/0009-0000-9122-3610","affiliations":[{"raw_affiliation_string":"RWTH Aachen University, Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023628905","display_name":"Tobias Gemmeke","orcid":"https://orcid.org/0000-0003-1583-3411"},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tobias Gemmeke","raw_affiliation_strings":["RWTH Aachen University, Aachen, Germany"],"raw_orcid":"https://orcid.org/0000-0003-1583-3411","affiliations":[{"raw_affiliation_string":"RWTH Aachen University, Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I887968799"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.90906336,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"876","last_page":"882"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.7843999862670898,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.7843999862670898,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.02879999950528145,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.023900000378489494,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.7294999957084656},{"id":"https://openalex.org/keywords/multiplexing","display_name":"Multiplexing","score":0.5281000137329102},{"id":"https://openalex.org/keywords/separable-space","display_name":"Separable space","score":0.5182999968528748},{"id":"https://openalex.org/keywords/multiplier","display_name":"Multiplier (economics)","score":0.4693000018596649},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4562999904155731},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.43130001425743103},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.4198000133037567},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.398499995470047},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.38670000433921814}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.7294999957084656},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6459000110626221},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5651999711990356},{"id":"https://openalex.org/C19275194","wikidata":"https://www.wikidata.org/wiki/Q222903","display_name":"Multiplexing","level":2,"score":0.5281000137329102},{"id":"https://openalex.org/C70710897","wikidata":"https://www.wikidata.org/wiki/Q680081","display_name":"Separable space","level":2,"score":0.5182999968528748},{"id":"https://openalex.org/C124584101","wikidata":"https://www.wikidata.org/wiki/Q1053266","display_name":"Multiplier (economics)","level":2,"score":0.4693000018596649},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4562999904155731},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.43130001425743103},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.42010000348091125},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.4198000133037567},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.398499995470047},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.38670000433921814},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.3707999885082245},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.3465000092983246},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.3418999910354614},{"id":"https://openalex.org/C41431624","wikidata":"https://www.wikidata.org/wiki/Q1053357","display_name":"Block size","level":3,"score":0.3386000096797943},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.3328999876976013},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.322299987077713},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.30799999833106995},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.3070000112056732},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.3057999908924103},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.28200000524520874},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.28130000829696655},{"id":"https://openalex.org/C7720571","wikidata":"https://www.wikidata.org/wiki/Q1136880","display_name":"Power of two","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C65232700","wikidata":"https://www.wikidata.org/wiki/Q5656403","display_name":"Hardware architecture","level":3,"score":0.26750001311302185},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.2583000063896179},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.25209999084472656},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3787109.3815213","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3787109.3815213","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Great Lakes Symposium on VLSI 2026","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3787109.3815213","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3787109.3815213","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Great Lakes Symposium on VLSI 2026","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8895881175994873,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2625954420","https://openalex.org/W2963163009","https://openalex.org/W2963367920","https://openalex.org/W2982083293","https://openalex.org/W2987129023","https://openalex.org/W3034714646","https://openalex.org/W4319996342","https://openalex.org/W4366821684","https://openalex.org/W4382814440","https://openalex.org/W4388283422","https://openalex.org/W4388894005","https://openalex.org/W4390776943","https://openalex.org/W4396918326","https://openalex.org/W4401210315","https://openalex.org/W4401808781","https://openalex.org/W4404057199","https://openalex.org/W4408151495","https://openalex.org/W4408183245","https://openalex.org/W4408183360","https://openalex.org/W4411206241","https://openalex.org/W4411713446","https://openalex.org/W4411724833","https://openalex.org/W7134903314"],"related_works":[],"abstract_inverted_index":{"Quantization":[0],"and":[1,11,26,51,116,126,142,174],"pruning":[2,52,75],"are":[3,20],"effective":[4,156],"techniques":[5],"for":[6,22,45],"reducing":[7],"neural":[8],"network":[9,74],"size":[10],"improving":[12],"energy":[13,162],"efficiency.":[14],"Although":[15],"fixed":[16,92],"word":[17,111],"length":[18],"networks":[19,28],"well-suited":[21],"hardware":[23,33,79,83],"acceleration,":[24],"mixed-precision":[25,67,100],"pruned":[27],"still":[29],"suffer":[30],"from":[31],"efficient":[32,78],"support.":[34],"Depthwise":[35],"separable":[36],"convolution":[37],"(DSC)":[38],"has":[39],"become":[40],"a":[41,65,91,160,168],"key":[42],"building":[43],"block":[44],"resource-constrained":[46],"devices;":[47],"however,":[48],"applying":[49],"quantization":[50,69],"to":[53,109],"DSC":[54,136],"models":[55],"remains":[56],"challenging.":[57],"To":[58],"address":[59],"these":[60],"challenges,":[61],"we":[62],"propose":[63],"GUPrecision,":[64],"group-wise":[66],"uniform":[68],"framework":[70],"that":[71],"inherently":[72],"supports":[73],"while":[76],"enabling":[77],"realization.":[80],"GUPrecision":[81,121],"achieves":[82,159],"compatibility":[84],"by":[85],"dividing":[86],"channels":[87],"into":[88],"subgroups":[89],"with":[90,167],"total":[93],"bit":[94],"budget.":[95],"Within":[96],"each":[97],"group,":[98],"the":[99,103,123],"multipliers":[101],"in":[102],"PE":[104],"array":[105],"can":[106],"dynamically":[107],"adapt":[108],"varying":[110],"lengths":[112],"using":[113,129],"simple":[114],"shifting":[115],"multiplexing":[117],"operations.":[118],"We":[119],"evaluated":[120],"on":[122],"MobileNetV1":[124],"model":[125],"implemented":[127],"it":[128,158],"GlobalFoundries":[130],"22":[131],"nm":[132],"FDSOI":[133],"technology.":[134],"The":[135],"accelerator":[137],"operates":[138],"at":[139],"1":[140],"GHz":[141],"0.8":[143],"V":[144],"after":[145],"signoff,":[146],"occupying":[147],"an":[148,175],"area":[149,176],"of":[150,164,171,178],"0.71":[151],"mm2":[152],".":[153],"At":[154],"75%":[155],"sparsity,":[157],"peak":[161],"efficiency":[163,177],"17.4":[165],"TOPS/W,":[166],"corresponding":[169],"throughput":[170],"8136":[172],"GOPS":[173],"11459":[179],"GOPS/mm2.":[180]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2026-06-19T00:00:00"}
