{"id":"https://openalex.org/W4409248709","doi":"https://doi.org/10.1109/hpca61900.2025.00084","title":"BitMoD: Bit-serial Mixture-of-Datatype LLM Acceleration","display_name":"BitMoD: Bit-serial Mixture-of-Datatype LLM Acceleration","publication_year":2025,"publication_date":"2025-03-01","ids":{"openalex":"https://openalex.org/W4409248709","doi":"https://doi.org/10.1109/hpca61900.2025.00084"},"language":"en","primary_location":{"id":"doi:10.1109/hpca61900.2025.00084","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00084","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101930621","display_name":"Yuzong Chen","orcid":"https://orcid.org/0000-0001-6387-327X"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuzong Chen","raw_affiliation_strings":["Cornell University,Computer Systems Lab"],"affiliations":[{"raw_affiliation_string":"Cornell University,Computer Systems Lab","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069242854","display_name":"Ahmed F. AbouElhamayed","orcid":"https://orcid.org/0000-0001-6381-2936"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ahmed F. AbouElhamayed","raw_affiliation_strings":["Cornell University,Computer Systems Lab"],"affiliations":[{"raw_affiliation_string":"Cornell University,Computer Systems Lab","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102582286","display_name":"Xilai Dai","orcid":null},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xilai Dai","raw_affiliation_strings":["Cornell University,Computer Systems Lab"],"affiliations":[{"raw_affiliation_string":"Cornell University,Computer Systems Lab","institution_ids":["https://openalex.org/I205783295"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100714578","display_name":"Yang Wang","orcid":"https://orcid.org/0000-0002-6815-0879"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yang Wang","raw_affiliation_strings":["Systems and Networking Research Group, Microsoft Research"],"affiliations":[{"raw_affiliation_string":"Systems and Networking Research Group, Microsoft Research","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092829415","display_name":"Marta Andronic","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Marta Andronic","raw_affiliation_strings":["Imperial College London,Department of Electrical and Electronic Engineering"],"affiliations":[{"raw_affiliation_string":"Imperial College London,Department of Electrical and Electronic Engineering","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029829952","display_name":"George A. Constantinides","orcid":"https://orcid.org/0000-0002-0201-310X"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"George A. Constantinides","raw_affiliation_strings":["Imperial College London,Department of Electrical and Electronic Engineering"],"affiliations":[{"raw_affiliation_string":"Imperial College London,Department of Electrical and Electronic Engineering","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000815783","display_name":"Mohamed S. Abdelfattah","orcid":"https://orcid.org/0000-0002-6416-9011"},"institutions":[{"id":"https://openalex.org/I205783295","display_name":"Cornell University","ror":"https://ror.org/05bnh6r87","country_code":"US","type":"education","lineage":["https://openalex.org/I205783295"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohamed S. Abdelfattah","raw_affiliation_strings":["Cornell University,Computer Systems Lab"],"affiliations":[{"raw_affiliation_string":"Cornell University,Computer Systems Lab","institution_ids":["https://openalex.org/I205783295"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101930621"],"corresponding_institution_ids":["https://openalex.org/I205783295"],"apc_list":null,"apc_paid":null,"fwci":13.8558,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.98909784,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1082","last_page":"1097"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9745000004768372,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.9745000004768372,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9666000008583069,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9509000182151794,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.7658936977386475},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.762336254119873},{"id":"https://openalex.org/keywords/bit","display_name":"Bit (key)","score":0.594845712184906},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.05957573652267456},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.05354681611061096}],"concepts":[{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.7658936977386475},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.762336254119873},{"id":"https://openalex.org/C117011727","wikidata":"https://www.wikidata.org/wiki/Q1278488","display_name":"Bit (key)","level":2,"score":0.594845712184906},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.05957573652267456},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.05354681611061096},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca61900.2025.00084","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00084","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.6899999976158142,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2026445983","https://openalex.org/W2251939518","https://openalex.org/W2541839172","https://openalex.org/W2725159389","https://openalex.org/W2883920103","https://openalex.org/W2904902077","https://openalex.org/W2946609015","https://openalex.org/W2949870694","https://openalex.org/W2979439447","https://openalex.org/W2998617917","https://openalex.org/W3006586535","https://openalex.org/W3100985894","https://openalex.org/W3187908937","https://openalex.org/W3205209748","https://openalex.org/W3213241618","https://openalex.org/W4247198796","https://openalex.org/W4281660701","https://openalex.org/W4308083739","https://openalex.org/W4366341968","https://openalex.org/W4377864779","https://openalex.org/W4389476299","https://openalex.org/W4393145114","https://openalex.org/W4393406875","https://openalex.org/W4393407021","https://openalex.org/W4402670692","https://openalex.org/W4404954664","https://openalex.org/W4406650295"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4327546585","https://openalex.org/W2411923897","https://openalex.org/W4394546135","https://openalex.org/W4285347720","https://openalex.org/W4200259850","https://openalex.org/W2333831899","https://openalex.org/W2484894494"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"have":[4],"demonstrated":[5],"remarkable":[6],"performance":[7,225],"across":[8],"various":[9],"machine":[10],"learning":[11],"tasks.":[12],"Yet":[13],"the":[14,29,51,76,106,145,157,222],"substantial":[15],"memory":[16],"footprint":[17],"of":[18,31,71,79,235],"LLMs":[19,32,170],"significantly":[20,174],"hinders":[21],"their":[22],"deployment.":[23],"In":[24],"this":[25],"paper,":[26],"we":[27],"improve":[28],"accessibility":[30],"through":[33],"BitMoD<sup":[34],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[35,251],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>,":[36],"an":[37,227,233],"algorithm-hardware":[38],"co-design":[39],"solution":[40],"that":[41,60,172],"enables":[42],"efficient":[43,228],"LLM":[44,89,177,188,208,218,245],"acceleration":[45,180],"at":[46],"low":[47,93],"weight":[48,140],"precision.":[49],"On":[50,105],"algorithm":[52],"side,":[53,108],"BitMoD":[54,84,109,173,185,203,231],"introduces":[55],"fine-grained":[56],"data":[57,65,82,122,141],"type":[58,66],"adaptation":[59],"uses":[61],"a":[62,69,111,134,151],"different":[63,139],"numerical":[64,119],"to":[67,87,91,115,137,155,190,206,210,243],"quantize":[68,88,187,207],"group":[70],"(e.g.,":[72,95],"128)":[73],"weights.":[74],"Through":[75],"careful":[77],"design":[78,126],"these":[80],"new":[81],"types,":[83,142],"is":[85,204,253],"able":[86,205],"weights":[90,189,209],"very":[92],"precision":[94],"4":[96,191],"bits":[97],"and":[98,121,179,238,248],"3":[99],"bits)":[100],"while":[101,212],"maintaining":[102],"high":[103],"accuracy.":[104],"hardware":[107,125,146,163],"employs":[110,133],"bitserial":[112],"processing":[113],"element":[114],"easily":[116],"support":[117],"multiple":[118],"precisions":[120],"types;":[123],"our":[124],"includes":[127],"two":[128],"key":[129],"innovations:":[130],"First,":[131],"it":[132,149],"unified":[135],"representation":[136],"process":[138],"thus":[143],"reducing":[144],"cost.":[147],"Second,":[148],"adopts":[150],"bit-serial":[152],"dequantization":[153],"unit":[154],"rescale":[156],"per-group":[158],"partial":[159],"sum":[160],"with":[161,193,226],"minimal":[162],"overhead.":[164],"Our":[165],"evaluation":[166],"on":[167,198],"six":[168],"representative":[169],"demonstrates":[171],"outperforms":[175],"state-of-the-art":[176],"quantization":[178,219],"methods.":[181],"For":[182,200],"discriminative":[183],"tasks,":[184,202],"can":[186],"-bit":[192],"<":[194],"0.5%":[195],"accuracy":[196],"loss":[197],"average.":[199],"generative":[201],"3-bit":[211],"achieving":[213],"better":[214],"perplexity":[215],"than":[216],"prior":[217,244],"scheme.":[220],"Combining":[221],"superior":[223],"model":[224],"accelerator":[229],"design,":[230],"achieves":[232],"average":[234],"$1.69":[236],"\\times$":[237,240],"$1.48":[239],"speedups":[241],"compared":[242],"accelerators":[246],"ANT":[247],"OliVe,":[249],"respectively.<sup":[250],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>Code":[252],"available":[254],"at:":[255],"https://github.com/yc2367/BitMoD-HPCA-25":[256]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
