{"id":"https://openalex.org/W4387010866","doi":"https://doi.org/10.1109/tpami.2023.3319045","title":"Vertical Layering of Quantized Neural Networks for Heterogeneous Inference","display_name":"Vertical Layering of Quantized Neural Networks for Heterogeneous Inference","publication_year":2023,"publication_date":"2023-09-25","ids":{"openalex":"https://openalex.org/W4387010866","doi":"https://doi.org/10.1109/tpami.2023.3319045","pmid":"https://pubmed.ncbi.nlm.nih.gov/37747868"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2023.3319045","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2023.3319045","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101718745","display_name":"Wu Hai","orcid":"https://orcid.org/0000-0002-2921-9132"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Hai Wu","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, The University of Hong Kong, Pok Fu Lam, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-2921-9132","affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, The University of Hong Kong, Pok Fu Lam, Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008977217","display_name":"Ruifei He","orcid":"https://orcid.org/0009-0009-6161-005X"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ruifei He","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, The University of Hong Kong, Pok Fu Lam, Hong Kong"],"raw_orcid":"https://orcid.org/0009-0009-6161-005X","affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, The University of Hong Kong, Pok Fu Lam, Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054734725","display_name":"Haoru Tan","orcid":"https://orcid.org/0009-0001-6721-2468"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Haoru Tan","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, The University of Hong Kong, Pok Fu Lam, Hong Kong"],"raw_orcid":"https://orcid.org/0009-0001-6721-2468","affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, The University of Hong Kong, Pok Fu Lam, Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102498323","display_name":"Xiaojuan Qi","orcid":"https://orcid.org/0000-0002-4285-1626"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xiaojuan Qi","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, The University of Hong Kong, Pok Fu Lam, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-4285-1626","affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, The University of Hong Kong, Pok Fu Lam, Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007131492","display_name":"Kaibin Huang","orcid":"https://orcid.org/0000-0001-8773-4629"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Kaibin Huang","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, The University of Hong Kong, Pok Fu Lam, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0001-8773-4629","affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, The University of Hong Kong, Pok Fu Lam, Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4491,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.64205397,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"45","issue":"12","first_page":"15964","last_page":"15978"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.770474910736084},{"id":"https://openalex.org/keywords/upsampling","display_name":"Upsampling","score":0.7369146943092346},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7336786389350891},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6635533571243286},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6168616414070129},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.48367056250572205},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.45230674743652344},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.43117761611938477},{"id":"https://openalex.org/keywords/layering","display_name":"Layering","score":0.41742777824401855},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40031591057777405},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3960323631763458},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.34893345832824707}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.770474910736084},{"id":"https://openalex.org/C110384440","wikidata":"https://www.wikidata.org/wiki/Q1143270","display_name":"Upsampling","level":3,"score":0.7369146943092346},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7336786389350891},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6635533571243286},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6168616414070129},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.48367056250572205},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45230674743652344},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.43117761611938477},{"id":"https://openalex.org/C176055353","wikidata":"https://www.wikidata.org/wiki/Q1402114","display_name":"Layering","level":2,"score":0.41742777824401855},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40031591057777405},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3960323631763458},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34893345832824707},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2023.3319045","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2023.3319045","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:37747868","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37747868","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2367108652","display_name":null,"funder_award_id":"2019B1515130003","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"}],"funders":[{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":96,"referenced_works":["https://openalex.org/W1037408538","https://openalex.org/W1686810756","https://openalex.org/W1724438581","https://openalex.org/W1821462560","https://openalex.org/W1902934009","https://openalex.org/W2100717205","https://openalex.org/W2117539524","https://openalex.org/W2119144962","https://openalex.org/W2163605009","https://openalex.org/W2194775991","https://openalex.org/W2469490737","https://openalex.org/W2524428287","https://openalex.org/W2531315337","https://openalex.org/W2560017826","https://openalex.org/W2586654419","https://openalex.org/W2739542029","https://openalex.org/W2786771851","https://openalex.org/W2806990599","https://openalex.org/W2884150179","https://openalex.org/W2905741102","https://openalex.org/W2916954108","https://openalex.org/W2950673314","https://openalex.org/W2962746461","https://openalex.org/W2962761403","https://openalex.org/W2962944050","https://openalex.org/W2963163009","https://openalex.org/W2963263347","https://openalex.org/W2963393494","https://openalex.org/W2963480671","https://openalex.org/W2963723401","https://openalex.org/W2964137095","https://openalex.org/W2970601456","https://openalex.org/W2972343638","https://openalex.org/W2981751377","https://openalex.org/W2982041622","https://openalex.org/W2982479999","https://openalex.org/W2990953743","https://openalex.org/W2994749257","https://openalex.org/W3004061291","https://openalex.org/W3034644181","https://openalex.org/W3034764953","https://openalex.org/W3035078980","https://openalex.org/W3035282660","https://openalex.org/W3035332806","https://openalex.org/W3089015099","https://openalex.org/W3096533519","https://openalex.org/W3108835732","https://openalex.org/W3118608800","https://openalex.org/W3128299679","https://openalex.org/W3154998900","https://openalex.org/W3165976180","https://openalex.org/W3166874749","https://openalex.org/W3171689473","https://openalex.org/W3183118997","https://openalex.org/W3202442802","https://openalex.org/W3204296682","https://openalex.org/W3204647170","https://openalex.org/W3204960814","https://openalex.org/W4226217245","https://openalex.org/W4287824105","https://openalex.org/W4288334594","https://openalex.org/W4292864963","https://openalex.org/W4295312788","https://openalex.org/W4306809345","https://openalex.org/W6637373629","https://openalex.org/W6637709462","https://openalex.org/W6638523607","https://openalex.org/W6639703010","https://openalex.org/W6677580257","https://openalex.org/W6684191040","https://openalex.org/W6713132643","https://openalex.org/W6720242923","https://openalex.org/W6726497184","https://openalex.org/W6727208969","https://openalex.org/W6730047919","https://openalex.org/W6741753902","https://openalex.org/W6745722055","https://openalex.org/W6748224102","https://openalex.org/W6751913510","https://openalex.org/W6754005058","https://openalex.org/W6757036269","https://openalex.org/W6760069825","https://openalex.org/W6761158446","https://openalex.org/W6763724728","https://openalex.org/W6766225098","https://openalex.org/W6766978945","https://openalex.org/W6767064347","https://openalex.org/W6767298317","https://openalex.org/W6770698715","https://openalex.org/W6772326514","https://openalex.org/W6779405248","https://openalex.org/W6779959628","https://openalex.org/W6786271619","https://openalex.org/W6787972765","https://openalex.org/W6795991173","https://openalex.org/W6799009007"],"related_works":["https://openalex.org/W2364255229","https://openalex.org/W2053119319","https://openalex.org/W3189260464","https://openalex.org/W2366451163","https://openalex.org/W2334616087","https://openalex.org/W3183118997","https://openalex.org/W3214410901","https://openalex.org/W3204400881","https://openalex.org/W3204296682","https://openalex.org/W2917767146"],"abstract_inverted_index":{"Although":[0],"considerable":[1,38],"progress":[2],"has":[3],"been":[4],"obtained":[5,107,123],"in":[6,40,245],"neural":[7,55,98],"network":[8,56,99],"quantization":[9,103,126],"for":[10,32,58,152],"efficient":[11],"inference,":[12],"existing":[13,125],"methods":[14,127],"are":[15,134,243],"not":[16],"scalable":[17],"to":[18,26,89,113,136,169,194,269],"heterogeneous":[19],"devices":[20],"as":[21,70,217,263],"one":[22,33,253],"dedicated":[23],"model":[24,41,174,191],"needs":[25],"be":[27,106,180],"trained,":[28,193],"transmitted,":[29],"and":[30,43,208,228,238,254,258],"stored":[31],"specific":[34,271],"hardware":[35],"setting,":[36],"incurring":[37],"costs":[39],"training":[42,149],"maintenance.":[44],"In":[45],"this":[46,141],"paper,":[47],"we":[48,118,143],"study":[49],"a":[50,64,71,97,145,160,196,251],"new":[51],"vertical-layered":[52,137,155,197,236],"representation":[53,237],"of":[54,73,186,265],"weights":[57,69,175,203],"encapsulating":[59],"all":[60,187],"quantized":[61,202,248,266],"models":[62,122,267],"into":[63,250],"single":[65,252],"one.":[66],"It":[67],"represents":[68],"group":[72],"bits":[74,92],"(i.e.,":[75,93],"vertical":[76],"layers)":[77],"organized":[78],"from":[79],"the":[80,86,114,165,171,184,190,199,205,213,234],"most":[81],"significant":[82,91],"bit":[83,210],"(also":[84],"called":[85],"basic":[87,115,206],"layer)":[88],"less":[90],"enhance":[94,111,219],"layers).":[95],"Hence,":[96],"with":[100,124,164],"an":[101,218],"arbitrary":[102],"precision":[104],"can":[105,179],"by":[108],"adding":[109],"corresponding":[110],"layers":[112],"layer.":[116,220],"However,":[117],"empirically":[119],"find":[120],"that":[121,177,233,264],"suffer":[128],"severe":[129],"performance":[130,185,262],"degradation":[131],"if":[132],"they":[133,178],"adapted":[135],"weight":[138],"representation.":[139],"To":[140],"end,":[142],"propose":[144],"simple":[146],"once":[147,240],"quantization-aware":[148],"(QAT)":[150],"scheme":[151,242],"obtaining":[153],"high-performance":[154],"models.":[156],"Our":[157,221],"design":[158,222],"incorporates":[159],"cascade":[161],"downsampling":[162,214],"mechanism":[163],"multi-objective":[166],"optimization":[167],"employed":[168],"train":[170],"shared":[172],"source":[173],"such":[176],"updated":[181],"simultaneously,":[182],"considering":[183],"networks.":[188],"After":[189],"is":[192,223],"construct":[195],"network,":[198],"lowest":[200],"bit-width":[201],"become":[204],"layer,":[207],"every":[209],"dropped":[211],"along":[212],"process":[215],"act":[216],"extensively":[224],"evaluated":[225],"on":[226],"CIFAR-100":[227],"ImageNet":[229],"datasets.":[230],"Experiments":[231],"show":[232],"proposed":[235],"developed":[239],"QAT":[241],"effective":[244],"embodying":[246],"multiple":[247],"networks":[249],"allow":[255],"one-time":[256],"training,":[257],"it":[259],"delivers":[260],"comparable":[261],"tailored":[268],"any":[270],"bit-width.":[272]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
