{"id":"https://openalex.org/W7138080569","doi":"https://doi.org/10.1609/aaai.v40i27.39406","title":"HALO: Hardware-Aware Quantization with Low Critical-Path-Delay Weights for LLM Acceleration","display_name":"HALO: Hardware-Aware Quantization with Low Critical-Path-Delay Weights for LLM Acceleration","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138080569","doi":"https://doi.org/10.1609/aaai.v40i27.39406"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i27.39406","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39406","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39406/43367","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39406/43367","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066607195","display_name":"Rohan Juneja","orcid":"https://orcid.org/0000-0002-6015-1084"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Rohan Juneja","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101579667","display_name":"Shivam Aggarwal","orcid":"https://orcid.org/0000-0003-1748-9810"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Shivam Aggarwal","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125760841","display_name":"Safeen Huda","orcid":null},"institutions":[{"id":"https://openalex.org/I4210161460","display_name":"OpenAI (United States)","ror":"https://ror.org/05wx9n238","country_code":"US","type":"company","lineage":["https://openalex.org/I4210161460"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Safeen Huda","raw_affiliation_strings":["OpenAI"],"affiliations":[{"raw_affiliation_string":"OpenAI","institution_ids":["https://openalex.org/I4210161460"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129681620","display_name":"Tulika Mitra","orcid":null},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Tulika Mitra","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057413185","display_name":"Li-Shiuan Peh","orcid":"https://orcid.org/0000-0001-9010-6519"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Li-Shiuan Peh","raw_affiliation_strings":["National University of Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5066607195"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.3858209,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"27","first_page":"22472","last_page":"22481"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.3100999891757965,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.3100999891757965,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.23080000281333923,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.11259999871253967,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.7473000288009644},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.650600016117096},{"id":"https://openalex.org/keywords/halo","display_name":"Halo","score":0.6273000240325928},{"id":"https://openalex.org/keywords/frequency-scaling","display_name":"Frequency scaling","score":0.608299970626831},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.6014000177383423},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4099000096321106},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.3621000051498413}],"concepts":[{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.7473000288009644},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6654999852180481},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.650600016117096},{"id":"https://openalex.org/C184665706","wikidata":"https://www.wikidata.org/wiki/Q186310","display_name":"Halo","level":3,"score":0.6273000240325928},{"id":"https://openalex.org/C157742956","wikidata":"https://www.wikidata.org/wiki/Q3237776","display_name":"Frequency scaling","level":3,"score":0.608299970626831},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.6014000177383423},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4099000096321106},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3921000063419342},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.3686999976634979},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.3621000051498413},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.3449999988079071},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.34049999713897705},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.3327000141143799},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.32109999656677246},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.3208000063896179},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.2732999920845032}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i27.39406","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39406","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39406/43367","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i27.39406","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i27.39406","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39406/43367","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.9119400978088379,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138080569.pdf","grobid_xml":"https://content.openalex.org/works/W7138080569.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Quantization":[0,77],"is":[1],"critical":[2],"for":[3,23,74],"efficiently":[4],"deploying":[5],"large":[6],"language":[7],"models":[8],"(LLMs).":[9],"Yet":[10],"conventional":[11],"methods":[12],"remain":[13],"hardware-agnostic,":[14],"limited":[15],"to":[16,47],"bit-width":[17],"constraints,":[18],"and":[19,32,52,91,109,129,136,163,181],"do":[20],"not":[21],"account":[22],"intrinsic":[24],"circuit":[25],"characteristics":[26],"such":[27,157],"as":[28,158],"the":[29,45,56,115,146],"timing":[30,50,90],"behaviors":[31],"energy":[33,152,182],"profiles":[34],"of":[35,59,179,184],"Multiply-Accumulate":[36],"(MAC)":[37],"units.":[38],"This":[39],"disconnect":[40],"from":[41],"circuit-level":[42],"behavior":[43],"limits":[44],"ability":[46],"exploit":[48],"available":[49],"margins":[51],"energy-saving":[53],"opportunities,":[54],"reducing":[55,142],"overall":[57],"efficiency":[58],"deployment":[60],"on":[61,155,194],"modern":[62],"accelerators.":[63],"To":[64],"address":[65],"these":[66,121],"limitations,":[67],"we":[68],"propose":[69],"HALO,":[70],"a":[71,125],"versatile":[72],"framework":[73],"Hardware-Aware":[75],"Post-Training":[76],"(PTQ).":[78],"Unlike":[79],"traditional":[80],"methods,":[81,189],"HALO":[82,98,119,149,170],"explicitly":[83],"incorporates":[84],"detailed":[85],"hardware":[86],"characteristics,":[87],"including":[88],"critical-path":[89],"power":[92],"consumption,":[93],"into":[94],"its":[95],"quantization":[96,188],"approach.":[97],"strategically":[99],"selects":[100],"weights":[101],"with":[102,123,191],"low":[103],"critical-path-delays":[104],"enabling":[105],"higher":[106],"operational":[107],"frequencies":[108],"dynamic":[110,127],"frequency":[111,130],"scaling":[112,131],"without":[113],"disrupting":[114],"architecture's":[116],"dataflow.":[117],"Remarkably,":[118],"achieves":[120],"improvements":[122,178],"only":[124],"few":[126],"voltage":[128],"(DVFS)":[132],"adjustments,":[133],"ensuring":[134],"simplicity":[135],"practicality":[137],"in":[138],"deployment.":[139],"Additionally,":[140],"by":[141],"switching":[143],"activity":[144],"within":[145],"MAC":[147],"units,":[148],"effectively":[150],"lowers":[151],"consumption.":[153],"Evaluations":[154],"accelerators":[156],"Tensor":[159],"Processing":[160,165],"Units":[161,166],"(TPUs)":[162],"Graphics":[164],"(GPUs)":[167],"demonstrate":[168],"that":[169],"significantly":[171],"enhances":[172],"inference":[173],"efficiency,":[174],"achieving":[175],"average":[176],"performance":[177],"270%":[180],"savings":[183],"51%":[185],"over":[186],"baseline":[187],"all":[190],"minimal":[192],"impact":[193],"accuracy.":[195]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
