{"id":"https://openalex.org/W4406650295","doi":"https://doi.org/10.1145/3714983.3714987","title":"AWQ: Activation-aware Weight Quantization for On-Device LLM Compression and Acceleration","display_name":"AWQ: Activation-aware Weight Quantization for On-Device LLM Compression and Acceleration","publication_year":2025,"publication_date":"2025-01-20","ids":{"openalex":"https://openalex.org/W4406650295","doi":"https://doi.org/10.1145/3714983.3714987"},"language":"en","primary_location":{"id":"doi:10.1145/3714983.3714987","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3714983.3714987","pdf_url":null,"source":{"id":"https://openalex.org/S4210227886","display_name":"GetMobile Mobile Computing and Communications","issn_l":"2375-0529","issn":["2375-0529","2375-0537"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"GetMobile: Mobile Computing and Communications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065001783","display_name":"Ji Lin","orcid":"https://orcid.org/0000-0001-6053-4344"},"institutions":[{"id":"https://openalex.org/I4210110987","display_name":"IIT@MIT","ror":"https://ror.org/01wp8zh54","country_code":"US","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210110987"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ji Lin","raw_affiliation_strings":["MIT, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210110987"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103244882","display_name":"Jiaming Tang","orcid":"https://orcid.org/0009-0004-4186-6561"},"institutions":[{"id":"https://openalex.org/I4210110987","display_name":"IIT@MIT","ror":"https://ror.org/01wp8zh54","country_code":"US","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210110987"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiaming Tang","raw_affiliation_strings":["MIT, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210110987"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101819029","display_name":"Haotian Tang","orcid":"https://orcid.org/0000-0001-6580-3881"},"institutions":[{"id":"https://openalex.org/I4210110987","display_name":"IIT@MIT","ror":"https://ror.org/01wp8zh54","country_code":"US","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210110987"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haotian Tang","raw_affiliation_strings":["MIT, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210110987"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112039102","display_name":"Shang Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210110987","display_name":"IIT@MIT","ror":"https://ror.org/01wp8zh54","country_code":"US","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210110987"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shang Yang","raw_affiliation_strings":["MIT, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210110987"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009724487","display_name":"Guangxuan Xiao","orcid":"https://orcid.org/0000-0002-7182-9284"},"institutions":[{"id":"https://openalex.org/I4210110987","display_name":"IIT@MIT","ror":"https://ror.org/01wp8zh54","country_code":"US","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210110987"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guangxuan Xiao","raw_affiliation_strings":["MIT, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210110987"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086688453","display_name":"Song Han","orcid":"https://orcid.org/0000-0001-7758-3679"},"institutions":[{"id":"https://openalex.org/I4210110987","display_name":"IIT@MIT","ror":"https://ror.org/01wp8zh54","country_code":"US","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210110987"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Song Han","raw_affiliation_strings":["MIT, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT, Cambridge, MA, USA","institution_ids":["https://openalex.org/I4210110987"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5065001783"],"corresponding_institution_ids":["https://openalex.org/I4210110987"],"apc_list":null,"apc_paid":null,"fwci":168.6958,"has_fulltext":false,"cited_by_count":150,"citation_normalized_percentile":{"value":0.99995951,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"28","issue":"4","first_page":"12","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11447","display_name":"Blind Source Separation Techniques","score":0.986299991607666,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11338","display_name":"Advancements in Photolithography Techniques","score":0.984000027179718,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5448650121688843},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.5441511869430542},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.4784541130065918},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4632059335708618},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.3449576497077942},{"id":"https://openalex.org/keywords/composite-material","display_name":"Composite material","score":0.18708348274230957},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.1591174602508545},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.10721400380134583},{"id":"https://openalex.org/keywords/classical-mechanics","display_name":"Classical mechanics","score":0.051212847232818604}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5448650121688843},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.5441511869430542},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.4784541130065918},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4632059335708618},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.3449576497077942},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.18708348274230957},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.1591174602508545},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.10721400380134583},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.051212847232818604}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3714983.3714987","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3714983.3714987","pdf_url":null,"source":{"id":"https://openalex.org/S4210227886","display_name":"GetMobile Mobile Computing and Communications","issn_l":"2375-0529","issn":["2375-0529","2375-0537"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"GetMobile: Mobile Computing and Communications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2916954108","https://openalex.org/W3111747337","https://openalex.org/W3159727696","https://openalex.org/W4292119927","https://openalex.org/W4307934016","https://openalex.org/W4309584731","https://openalex.org/W4309591680","https://openalex.org/W4312056202","https://openalex.org/W4384918448","https://openalex.org/W4385573671","https://openalex.org/W4389524393","https://openalex.org/W4402727885","https://openalex.org/W6760069825"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4404995717","https://openalex.org/W2016187641","https://openalex.org/W4404725684","https://openalex.org/W4246450666","https://openalex.org/W4388998267","https://openalex.org/W2898370298","https://openalex.org/W2137437058"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"have":[4],"transformed":[5],"numerous":[6],"AI":[7,144],"applications.":[8,151],"On-device":[9],"LLM":[10,63,139],"is":[11,66],"becoming":[12],"increasingly":[13],"important:":[14],"running":[15],"LLMs":[16],"locally":[17],"on":[18,78],"edge":[19,125],"devices":[20],"can":[21],"reduce":[22],"cloud":[23],"computing":[24],"costs":[25],"and":[26,35,54,73,110,120],"protect":[27],"users'":[28],"privacy.":[29],"However,":[30],"the":[31,36],"astronomical":[32],"model":[33,83,117],"size":[34,84,118],"limited":[37],"hardware":[38],"resources":[39],"pose":[40],"significant":[41],"deployment":[42],"challenges.":[43],"To":[44],"solve":[45],"these":[46],"issues,":[47],"we":[48],"propose":[49],"Activation-aware":[50],"Weight":[51],"Quantization":[52],"(AWQ)":[53],"TinyChat,":[55,88],"an":[56,89],"algorithm-system":[57],"full-stack":[58],"solution":[59,136],"for":[60],"efficient":[61],"on-device":[62,138],"deployment.":[64],"AWQ":[65],"a":[67,147],"novel":[68],"quantization":[69],"method":[70],"that":[71],"identifies":[72],"protects":[74],"salient":[75],"weights":[76],"based":[77],"activation":[79],"distribution,":[80],"significantly":[81],"reducing":[82],"while":[85],"preserving":[86],"performance.":[87],"optimized":[90],"inference":[91],"framework,":[92],"translates":[93],"AWQ's":[94],"theoretical":[95],"memory":[96],"savings":[97],"into":[98],"practical":[99],"speedups":[100],"through":[101],"techniques":[102],"such":[103],"as":[104],"on-the-fly":[105],"dequantization,":[106],"SIMD-aware":[107],"weight":[108],"packing,":[109],"kernel":[111],"fusion.":[112],"Together,":[113],"they":[114],"enable":[115],"4x":[116],"reduction":[119],"3-4x":[121],"acceleration":[122],"across":[123,146],"various":[124],"platforms,":[126],"from":[127],"high-end":[128],"desktop":[129],"GPUs":[130],"to":[131],"resource-constrained":[132],"IoT":[133],"devices.":[134],"This":[135],"democratizes":[137],"deployment,":[140],"offering":[141],"privacy-preserving,":[142],"low-latency":[143],"capabilities":[145],"wide":[148],"range":[149],"of":[150]},"counts_by_year":[{"year":2026,"cited_by_count":22},{"year":2025,"cited_by_count":122},{"year":2024,"cited_by_count":6}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
