{"id":"https://openalex.org/W4415708112","doi":"https://doi.org/10.1109/icme59968.2025.11209864","title":"Zero-shot Quantization of Vision Transformers: Leveraging Multi-model Ensembles and Attention Mixup","display_name":"Zero-shot Quantization of Vision Transformers: Leveraging Multi-model Ensembles and Attention Mixup","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415708112","doi":"https://doi.org/10.1109/icme59968.2025.11209864"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209864","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209864","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109330830","display_name":"Yao Li","orcid":"https://orcid.org/0000-0001-6736-355X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yao Li","raw_affiliation_strings":["Peking University,Guangdong Provincial Key Laboratory of Ultra High Definition Immersive Media Technology, School of Electronic and Computer Engineering, Shenzhen Graduate School"],"affiliations":[{"raw_affiliation_string":"Peking University,Guangdong Provincial Key Laboratory of Ultra High Definition Immersive Media Technology, School of Electronic and Computer Engineering, Shenzhen Graduate School","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012238287","display_name":"Xinrui Chen","orcid":"https://orcid.org/0000-0003-0910-4650"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinrui Chen","raw_affiliation_strings":["Tsinghua University,Shenzhen International Graduate School"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Shenzhen International Graduate School","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043276848","display_name":"Zhuozhen Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuozhen Yu","raw_affiliation_strings":["Peking University,Guangdong Provincial Key Laboratory of Ultra High Definition Immersive Media Technology, School of Electronic and Computer Engineering, Shenzhen Graduate School"],"affiliations":[{"raw_affiliation_string":"Peking University,Guangdong Provincial Key Laboratory of Ultra High Definition Immersive Media Technology, School of Electronic and Computer Engineering, Shenzhen Graduate School","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006776719","display_name":"Shunzhou Wang","orcid":"https://orcid.org/0000-0001-5401-043X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shunzhou Wang","raw_affiliation_strings":["Peking University,Guangdong Provincial Key Laboratory of Ultra High Definition Immersive Media Technology, School of Electronic and Computer Engineering, Shenzhen Graduate School"],"affiliations":[{"raw_affiliation_string":"Peking University,Guangdong Provincial Key Laboratory of Ultra High Definition Immersive Media Technology, School of Electronic and Computer Engineering, Shenzhen Graduate School","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050309466","display_name":"Wei Gao","orcid":"https://orcid.org/0000-0001-7429-5495"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Gao","raw_affiliation_strings":["Peking University,Guangdong Provincial Key Laboratory of Ultra High Definition Immersive Media Technology, School of Electronic and Computer Engineering, Shenzhen Graduate School"],"affiliations":[{"raw_affiliation_string":"Peking University,Guangdong Provincial Key Laboratory of Ultra High Definition Immersive Media Technology, School of Electronic and Computer Engineering, Shenzhen Graduate School","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5109330830"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30985873,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.32749998569488525,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.32749998569488525,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.15700000524520874,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.09600000083446503,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6248000264167786},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5489000082015991},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5440999865531921},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5374000072479248},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.47510001063346863},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.44510000944137573},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4083999991416931}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.777400016784668},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6399999856948853},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6248000264167786},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5489000082015991},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5440999865531921},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5374000072479248},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5157999992370605},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.47510001063346863},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.44510000944137573},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4083999991416931},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3301999866962433},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.31709998846054077},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.3061000108718872},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3057999908924103},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2651999890804291},{"id":"https://openalex.org/C197352929","wikidata":"https://www.wikidata.org/wiki/Q1074074","display_name":"Inductive bias","level":4,"score":0.25609999895095825}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209864","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209864","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2963163009","https://openalex.org/W2992308087","https://openalex.org/W3004061291","https://openalex.org/W3034940165","https://openalex.org/W3138516171","https://openalex.org/W3175730059","https://openalex.org/W3204480651","https://openalex.org/W4285601701","https://openalex.org/W4309125142","https://openalex.org/W4313047887","https://openalex.org/W4313069943","https://openalex.org/W4375839990","https://openalex.org/W4385819842","https://openalex.org/W4386066374","https://openalex.org/W4386075867","https://openalex.org/W4390873361","https://openalex.org/W4390873673"],"related_works":[],"abstract_inverted_index":{"Zero-shot":[0],"quantization":[1],"(ZSQ)":[2],"shows":[3],"promise":[4],"in":[5,12,118,141],"compressing":[6],"and":[7,63,71,92,138,167,210],"accelerating":[8],"deep":[9],"neural":[10,104],"networks":[11,105],"scenarios":[13],"where":[14],"the":[15,37,43,52,78,81,95,99,119,122,130,159,191,198,214],"original":[16],"training":[17,115],"data":[18,70,188],"is":[19,221],"inaccessible.":[20],"Recently,":[21],"ZSQ":[22,58,142],"for":[23,33,143,186],"vision":[24],"transformers":[25],"(ViTs)":[26],"has":[27,204],"been":[28,205],"proposed":[29],"to":[30,73,94,157,170,197,223,236],"synthesize":[31],"samples":[32,54,83],"ViT":[34,88],"network":[35,89],"quantization,":[36],"quality":[38],"of":[39,45,124,151,202],"which":[40,66],"significantly":[41],"impacts":[42],"performance":[44,75],"quantized":[46,222],"models.":[47],"Nonetheless,":[48],"we":[49,133],"observe":[50],"that":[51,175],"synthetic":[53,82,173],"produced":[55],"by":[56,85],"current":[57],"techniques":[59],"exhibit":[60],"severe":[61],"bias":[62],"insufficient":[64],"optimization,":[65],"deviate":[67],"from":[68],"real":[69],"lead":[72],"substantial":[74],"declines.":[76],"On":[77,98],"one":[79],"hand,":[80,101],"generated":[84],"a":[86,181,228],"single":[87],"are":[90],"inaccurate":[91],"biased":[93],"specific":[96],"model.":[97],"other":[100],"unlike":[102],"convolutional":[103],"(CNNs)":[106],"with":[107],"BatchNorm":[108,168],"layers,":[109],"ViTs":[110,144],"do":[111],"not":[112],"store":[113],"any":[114],"set":[116],"statistics":[117,169],"networks,":[120],"hindering":[121],"generation":[123],"high-quality":[125],"calibration":[126],"samples.":[127],"To":[128],"address":[129],"above":[131],"issues,":[132],"propose":[134],"leveraging":[135],"Multi-model":[136],"Ensembles":[137],"Attention":[139],"Mixup":[140],"(MMA-ViT).":[145],"Specifically,":[146],"MMA-ViT":[147,179,203],"employs":[148],"an":[149],"ensemble":[150],"diverse":[152],"pre-trained":[153],"proxy":[154],"CNN":[155],"models":[156],"narrow":[158],"sample":[160,192],"synthesizing":[161],"space,":[162],"utilizing":[163],"their":[164],"predictive":[165],"capabilities":[166],"generate":[171],"exact":[172],"images":[174],"enhance":[176],"generality.":[177],"Additionally,":[178],"integrates":[180],"unique":[182],"attention-driven":[183],"mixup":[184],"technique":[185],"accurate":[187],"augmentation":[189],"during":[190],"synthesis":[193],"process,":[194],"avoiding":[195],"over-fitting":[196],"networks.":[199],"The":[200],"efficacy":[201],"demonstrated":[206],"through":[207],"extensive":[208],"experiments":[209],"ablation":[211],"studies":[212],"on":[213,233],"ImageNet":[215,234],"dataset.":[216],"For":[217],"example,":[218],"when":[219],"Swin-B":[220],"W3/A4,":[224],"our":[225],"method":[226],"achieves":[227],"11.89%":[229],"top-1":[230],"accuracy":[231],"increase":[232],"compared":[235],"state-of-the-art":[237],"methods.":[238]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-30T00:00:00"}
