{"id":"https://openalex.org/W7127905731","doi":"https://doi.org/10.1145/3779212.3790189","title":"Mugi: Value Level Parallelism For Efficient LLMs","display_name":"Mugi: Value Level Parallelism For Efficient LLMs","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W7127905731","doi":"https://doi.org/10.1145/3779212.3790189"},"language":null,"primary_location":{"id":"doi:10.1145/3779212.3790189","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790189","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3779212.3790189","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125114080","display_name":"Daniel J. Price","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Daniel Price","raw_affiliation_strings":["Department of ECE, University of Central Florida, Orlando, Florida, USA"],"raw_orcid":"https://orcid.org/0009-0007-0571-7413","affiliations":[{"raw_affiliation_string":"Department of ECE, University of Central Florida, Orlando, Florida, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021629244","display_name":"Prabhu Vellaisamy","orcid":"https://orcid.org/0009-0007-7750-8725"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prabhu Vellaisamy","raw_affiliation_strings":["Department of ECE, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA"],"raw_orcid":"https://orcid.org/0009-0007-7750-8725","affiliations":[{"raw_affiliation_string":"Department of ECE, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125208395","display_name":"John P. Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"John Paul Shen","raw_affiliation_strings":["Department of ECE, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA"],"raw_orcid":"https://orcid.org/0000-0002-7225-0629","affiliations":[{"raw_affiliation_string":"Department of ECE, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5125256703","display_name":"Di Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Di Wu","raw_affiliation_strings":["Department of ECE, University of Central Florida, Orlando, Florida, USA"],"raw_orcid":"https://orcid.org/0000-0001-9775-8026","affiliations":[{"raw_affiliation_string":"Department of ECE, University of Central Florida, Orlando, Florida, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5125114080"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18116123,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1216","last_page":"1234"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.35740000009536743,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.35740000009536743,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.14509999752044678,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.14010000228881836,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.599399983882904},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.4973999857902527},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.4343000054359436},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.3970000147819519},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.38690000772476196},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.3752000033855438}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6711999773979187},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.599399983882904},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.4973999857902527},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.4343000054359436},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.3970000147819519},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.38690000772476196},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.3752000033855438},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.37130001187324524},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.36730000376701355},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.3662000000476837},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2766999900341034},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2694999873638153},{"id":"https://openalex.org/C2984755018","wikidata":"https://www.wikidata.org/wiki/Q17118374","display_name":"Nonlinear model","level":3,"score":0.2685000002384186},{"id":"https://openalex.org/C109332788","wikidata":"https://www.wikidata.org/wiki/Q615445","display_name":"Economic efficiency","level":2,"score":0.2612000107765198},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.2524000108242035}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3779212.3790189","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790189","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2601.10823","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2601.10823","pdf_url":"https://arxiv.org/pdf/2601.10823","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3779212.3790189","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790189","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.595548689365387,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Value":[0],"level":[1],"parallelism":[2],"(VLP)":[3],"has":[4],"been":[5],"proposed":[6],"to":[7,115,148],"improve":[8],"the":[9,117],"efficiency":[10,129],"of":[11],"large-batch,":[12],"low-precision":[13],"general":[14],"matrix":[15],"multiply":[16],"(GEMM)":[17],"between":[18],"symmetric":[19],"activations":[20],"and":[21,63,103,120,130,144,150,156,158,162,172],"weights.":[22],"In":[23,38],"transformer":[24],"based":[25],"large":[26],"language":[27],"models":[28],"(LLMs),":[29],"there":[30],"exist":[31],"more":[32],"sophisticated":[33],"operations":[34],"beyond":[35],"activation-weight":[36],"GEMM.":[37],"this":[39],"paper,":[40],"we":[41,48,81,108],"explore":[42],"how":[43],"VLP":[44,50,66,83,112],"benefits":[45],"LLMs.":[46],"First,":[47],"generalize":[49],"for":[51,84,152,160,167],"nonlinear":[52,56,153],"approximations,":[53],"outperforming":[54],"existing":[55],"approximations":[57],"in":[58],"end-to-end":[59],"LLM":[60,94,123,168],"accuracy,":[61],"performance,":[62,128],"efficiency.":[64],"Our":[65,132],"approximation":[67],"follows":[68],"a":[69,110],"value-centric":[70],"approach,":[71],"where":[72],"important":[73],"values":[74],"are":[75],"assigned":[76],"with":[77,87],"greater":[78],"accuracy.":[79],"Second,":[80],"optimize":[82],"small-batch":[85],"GEMMs":[86],"asymmetric":[88],"inputs":[89],"efficiently,":[90],"which":[91],"leverages":[92],"timely":[93],"optimizations,":[95],"including":[96],"weight-only":[97],"quantization,":[98,102],"key-value":[99],"(KV)":[100],"cache":[101],"group":[104],"query":[105],"attention.":[106],"Finally,":[107],"design":[109],"new":[111],"architecture,":[113],"Mugi,":[114],"encapsulate":[116],"innovations":[118],"above":[119],"support":[121],"full":[122],"workloads,":[124],"while":[125],"providing":[126],"better":[127],"sustainability.":[131],"experimental":[133],"results":[134],"show":[135],"that":[136],"Mugi":[137],"can":[138],"offer":[139],"significant":[140],"improvements":[141],"on":[142],"throughput":[143],"energy":[145],"efficiency,":[146],"up":[147],"$45\\times$":[149],"$668\\times$":[151],"softmax":[154],"operations,":[155],"$2.07\\times$":[157],"$3.11\\times$":[159],"LLMs,":[161],"also":[163],"decrease":[164],"operational":[165],"carbon":[166,174],"operation":[169],"by":[170,175],"$1.45\\times$":[171],"embodied":[173],"$1.48\\times$.":[176]},"counts_by_year":[],"updated_date":"2026-02-07T23:14:19.703344","created_date":"2026-02-07T00:00:00"}
