{"id":"https://openalex.org/W7138257596","doi":"https://doi.org/10.1609/aaai.v40i26.39296","title":"SPEED-Q: Staged Processing with Enhanced Distillation Towards Efficient Low-Bit On-Device VLM Quantization","display_name":"SPEED-Q: Staged Processing with Enhanced Distillation Towards Efficient Low-Bit On-Device VLM Quantization","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138257596","doi":"https://doi.org/10.1609/aaai.v40i26.39296"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i26.39296","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39296","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39296/43257","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39296/43257","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129647520","display_name":"Tianyu Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tianyu Guo","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129691415","display_name":"Shanwei Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shanwei Zhao","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050416778","display_name":"Shiai Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shiai Zhu","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5129695156","display_name":"Chenguang Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chenguang Ma","raw_affiliation_strings":["Ant Group"],"affiliations":[{"raw_affiliation_string":"Ant Group","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5129647520"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.60820896,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"26","first_page":"21486","last_page":"21494"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5491999983787537,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5491999983787537,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.14169999957084656,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.03830000013113022,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.8495000004768372},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.6406999826431274},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5238999724388123},{"id":"https://openalex.org/keywords/ranging","display_name":"Ranging","score":0.3698999881744385},{"id":"https://openalex.org/keywords/microcontroller","display_name":"Microcontroller","score":0.2874000072479248},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.27730000019073486}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.8495000004768372},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6696000099182129},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.6406999826431274},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5238999724388123},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.3698999881744385},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34439998865127563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3303000032901764},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.289900004863739},{"id":"https://openalex.org/C173018170","wikidata":"https://www.wikidata.org/wiki/Q165678","display_name":"Microcontroller","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.27730000019073486},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.26840001344680786},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.25529998540878296},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.2540999948978424},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i26.39296","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39296","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39296/43257","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i26.39296","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39296","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39296/43257","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138257596.pdf","grobid_xml":"https://content.openalex.org/works/W7138257596.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deploying":[0],"Vision-Language":[1],"Models":[2],"(VLMs)":[3],"on":[4,54],"edge":[5,72],"devices":[6],"(e.g.,":[7],"smartphones":[8],"and":[9,16,36,110,157,166,208,217],"robots)":[10],"is":[11,136,173],"crucial":[12],"for":[13,57,70,88,178],"enabling":[14],"low-latency":[15],"privacy-preserving":[17],"intelligent":[18],"applications.":[19],"Given":[20],"the":[21,42,58,96,121,154,174],"resource":[22],"constraints":[23],"of":[24,44,169],"these":[25],"devices,":[26],"quantization":[27,53,92,105,150,168,203],"offers":[28],"a":[29,80,131,148],"promising":[30],"solution":[31],"by":[32],"improving":[33],"memory":[34],"efficiency":[35],"reducing":[37],"bandwidth":[38],"requirements,":[39],"thereby":[40],"facilitating":[41],"deployment":[43],"VLMs.":[45,171],"However,":[46],"existing":[47,202],"research":[48],"has":[49],"rarely":[50],"explored":[51],"aggressive":[52],"VLMs,":[55],"particularly":[56],"models":[59],"ranging":[60],"from":[61,120],"1B":[62],"to":[63,138,152,184,197],"2B":[64],"parameters,":[65],"which":[66],"are":[67],"more":[68],"suitable":[69],"resource-constrained":[71],"devices.":[73],"In":[74,129],"this":[75],"paper,":[76],"we":[77],"propose":[78,147],"SPEED-Q,":[79,130],"novel":[81],"Staged":[82],"Processing":[83],"with":[84],"EnhancEd":[85],"Distillation":[86],"framework":[87,176],"VLM":[89],"low-bit":[90,127],"weight-only":[91],"that":[93,193],"systematically":[94],"addresses":[95],"following":[97],"two":[98],"critical":[99],"obstacles:":[100],"(1)":[101],"significant":[102],"discrepancies":[103],"in":[104,114,126],"sensitivity":[106,133],"between":[107],"vision":[108],"(ViT)":[109],"language":[111],"(LLM)":[112],"components":[113],"VLMs;":[115],"(2)":[116],"training":[117,155],"instability":[118],"arising":[119],"reduced":[122],"numerical":[123],"precision":[124],"inherent":[125],"quantization.":[128],"staged":[132],"adaptive":[134],"mechanism":[135],"introduced":[137],"effectively":[139],"harmonize":[140],"performance":[141],"across":[142,189],"different":[143],"modalities.":[144],"We":[145],"further":[146],"distillation-enhanced":[149],"strategy":[151],"stabilize":[153],"process":[156],"reduce":[158],"data":[159],"dependence.":[160],"Together,":[161],"SPEED-Q":[162,172,194],"enables":[163],"accurate,":[164],"stable,":[165],"data-efficient":[167],"complex":[170],"first":[175],"tailored":[177],"quantizing":[179],"entire":[180],"small-scale":[181],"billion-parameter":[182],"VLMs":[183,213],"low":[185],"bits.":[186],"Extensive":[187],"experiments":[188],"multiple":[190],"benchmarks":[191],"demonstrate":[192],"achieves":[195],"up":[196],"6x":[198],"higher":[199],"accuracy":[200],"than":[201],"methods":[204],"under":[205,214],"2-bit":[206,216],"settings":[207],"consistently":[209],"outperforms":[210],"prior":[211],"on-device":[212],"both":[215],"4-bit":[218],"settings.":[219]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
