{"id":"https://openalex.org/W4412944360","doi":"https://doi.org/10.18653/v1/2025.findings-acl.1386","title":"Automated Fine-Grained Mixture-of-Experts Quantization","display_name":"Automated Fine-Grained Mixture-of-Experts Quantization","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412944360","doi":"https://doi.org/10.18653/v1/2025.findings-acl.1386"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-acl.1386","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.1386","pdf_url":"https://aclanthology.org/2025.findings-acl.1386.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-acl.1386.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zhanhao Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhanhao Xie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106172220","display_name":"Yuexiao Ma","orcid":"https://orcid.org/0009-0006-6807-6214"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuexiao Ma","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054226277","display_name":"Xiawu Zheng","orcid":"https://orcid.org/0000-0002-6855-5403"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiawu Zheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000389309","display_name":"Fei Chao","orcid":"https://orcid.org/0000-0002-6928-2638"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fei Chao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047746131","display_name":"Wanchen Sui","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wanchen Sui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114911009","display_name":"Yong Li","orcid":"https://orcid.org/0000-0002-2281-5652"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yong Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101393907","display_name":"Shen Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5016080094","display_name":"Rongrong Ji","orcid":"https://orcid.org/0000-0001-9163-2932"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rongrong Ji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.5175,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.93093807,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"27024","last_page":"27037"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9419000148773193,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9419000148773193,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6312078833580017},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.4753303825855255},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3563954830169678},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2181149125099182}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6312078833580017},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.4753303825855255},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3563954830169678},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2181149125099182}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-acl.1386","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.1386","pdf_url":"https://aclanthology.org/2025.findings-acl.1386.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-acl.1386","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.1386","pdf_url":"https://aclanthology.org/2025.findings-acl.1386.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412944360.pdf","grobid_xml":"https://content.openalex.org/works/W4412944360.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0],"Mixture":[1],"of":[2,16],"Experts":[3],"(MoE)":[4],"architecture":[5,24],"enables":[6],"efficient":[7,125],"model":[8,29],"scaling":[9],"through":[10,124],"conditional":[11],"computation,":[12],"where":[13],"only":[14],"subset":[15],"parameters":[17],"are":[18],"activated":[19],"per":[20],"input.However,":[21],"this":[22],"distributed":[23],"poses":[25],"unprecedented":[26],"challenges":[27],"for":[28,36,47],"compression,":[30],"as":[31],"conventional":[32],"quantization":[33,45,90,111],"methods":[34],"optimized":[35],"dense":[37],"networks":[38,59],"prove":[39],"inadequate.This":[40],"paper":[41],"introduces":[42],"a":[43,66,83,136],"specialized":[44],"framework":[46,112],"MoE":[48],"architectures,":[49],"motivated":[50],"by":[51],"our":[52,133],"discovery":[53],"that":[54,93,113],"weight":[55],"matrices":[56,76],"across":[57,143],"expert":[58],"exhibit":[60],"distinctive":[61],"channel-wise":[62,110],"outlier":[63],"distributions,":[64],"necessitating":[65],"more":[67],"nuanced":[68],"compression":[69,151],"approach.Through":[70],"theoretical":[71],"analysis":[72],"incorporating":[73],"Fisher":[74],"Information":[75],"and":[77,89],"condition":[78],"number":[79],"characteristics,":[80],"we":[81,106],"establish":[82],"fundamental":[84],"relationship":[85],"between":[86],"layer":[87],"functionality":[88],"sensitivity,":[91],"demonstrating":[92],"downprojection":[94],"layers":[95],"inherently":[96],"demand":[97],"higher":[98],"precision":[99],"compared":[100],"to":[101],"up-projection":[102],"layers.Leveraging":[103],"these":[104],"insights,":[105],"develop":[107],"an":[108],"automated":[109],"dynamically":[114],"determines":[115],"optimal":[116],"bit-width":[117],"allocations":[118],"while":[119,148],"maintaining":[120],"minimal":[121],"computational":[122],"overhead":[123],"statistical":[126],"approximations.When":[127],"evaluated":[128],"on":[129],"the":[130],"Mixtral-8x7b-v0.1":[131],"architecture,":[132],"methodology":[134],"demonstrates":[135],"3.96%":[137],"improvement":[138],"over":[139],"existing":[140],"state-of-the-art":[141],"approaches":[142],"natural":[144],"language":[145],"understanding":[146],"benchmarks,":[147],"achieving":[149],"superior":[150],"ratios.":[152]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
