{"id":"https://openalex.org/W4414197479","doi":"https://doi.org/10.1109/dac63849.2025.11132989","title":"Finding the Pareto Frontier of Low-Precision Data Formats and MAC Architecture for LLM Inference","display_name":"Finding the Pareto Frontier of Low-Precision Data Formats and MAC Architecture for LLM Inference","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414197479","doi":"https://doi.org/10.1109/dac63849.2025.11132989"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11132989","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132989","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089938000","display_name":"Brian Crafton","orcid":"https://orcid.org/0000-0002-0227-0421"},"institutions":[{"id":"https://openalex.org/I1334877674","display_name":"Taiwan Semiconductor Manufacturing Company (United States)","ror":"https://ror.org/02rvfjx92","country_code":"US","type":"company","lineage":["https://openalex.org/I1334877674","https://openalex.org/I4210120917"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Brian Crafton","raw_affiliation_strings":["TSMC Corporate Research,San Jose,USA"],"affiliations":[{"raw_affiliation_string":"TSMC Corporate Research,San Jose,USA","institution_ids":["https://openalex.org/I1334877674"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076031530","display_name":"Xiaochen Peng","orcid":"https://orcid.org/0000-0001-6148-7711"},"institutions":[{"id":"https://openalex.org/I1334877674","display_name":"Taiwan Semiconductor Manufacturing Company (United States)","ror":"https://ror.org/02rvfjx92","country_code":"US","type":"company","lineage":["https://openalex.org/I1334877674","https://openalex.org/I4210120917"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaochen Peng","raw_affiliation_strings":["TSMC Corporate Research,San Jose,USA"],"affiliations":[{"raw_affiliation_string":"TSMC Corporate Research,San Jose,USA","institution_ids":["https://openalex.org/I1334877674"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062894678","display_name":"Xiaoyu Sun","orcid":"https://orcid.org/0000-0001-5337-5680"},"institutions":[{"id":"https://openalex.org/I1334877674","display_name":"Taiwan Semiconductor Manufacturing Company (United States)","ror":"https://ror.org/02rvfjx92","country_code":"US","type":"company","lineage":["https://openalex.org/I1334877674","https://openalex.org/I4210120917"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoyu Sun","raw_affiliation_strings":["TSMC Corporate Research,San Jose,USA"],"affiliations":[{"raw_affiliation_string":"TSMC Corporate Research,San Jose,USA","institution_ids":["https://openalex.org/I1334877674"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040695095","display_name":"Ashwin Sanjay Lele","orcid":null},"institutions":[{"id":"https://openalex.org/I1334877674","display_name":"Taiwan Semiconductor Manufacturing Company (United States)","ror":"https://ror.org/02rvfjx92","country_code":"US","type":"company","lineage":["https://openalex.org/I1334877674","https://openalex.org/I4210120917"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ashwin Lele","raw_affiliation_strings":["TSMC Corporate Research,San Jose,USA"],"affiliations":[{"raw_affiliation_string":"TSMC Corporate Research,San Jose,USA","institution_ids":["https://openalex.org/I1334877674"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100335367","display_name":"Bo Zhang","orcid":"https://orcid.org/0000-0003-1574-198X"},"institutions":[{"id":"https://openalex.org/I1334877674","display_name":"Taiwan Semiconductor Manufacturing Company (United States)","ror":"https://ror.org/02rvfjx92","country_code":"US","type":"company","lineage":["https://openalex.org/I1334877674","https://openalex.org/I4210120917"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bo Zhang","raw_affiliation_strings":["TSMC Corporate Research,San Jose,USA"],"affiliations":[{"raw_affiliation_string":"TSMC Corporate Research,San Jose,USA","institution_ids":["https://openalex.org/I1334877674"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083012416","display_name":"Win-San Khwa","orcid":"https://orcid.org/0000-0002-6283-3564"},"institutions":[{"id":"https://openalex.org/I4210120917","display_name":"Taiwan Semiconductor Manufacturing Company (Taiwan)","ror":"https://ror.org/02wx79d08","country_code":"TW","type":"company","lineage":["https://openalex.org/I4210120917"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Win-San Khwa","raw_affiliation_strings":["TSMC Corporate Research,Hsinchu,Taiwan"],"affiliations":[{"raw_affiliation_string":"TSMC Corporate Research,Hsinchu,Taiwan","institution_ids":["https://openalex.org/I4210120917"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067464561","display_name":"Kerem Akarvardar","orcid":"https://orcid.org/0000-0001-5957-826X"},"institutions":[{"id":"https://openalex.org/I1334877674","display_name":"Taiwan Semiconductor Manufacturing Company (United States)","ror":"https://ror.org/02rvfjx92","country_code":"US","type":"company","lineage":["https://openalex.org/I1334877674","https://openalex.org/I4210120917"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kerem Akarvardar","raw_affiliation_strings":["TSMC Corporate Research,San Jose,USA"],"affiliations":[{"raw_affiliation_string":"TSMC Corporate Research,San Jose,USA","institution_ids":["https://openalex.org/I1334877674"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5089938000"],"corresponding_institution_ids":["https://openalex.org/I1334877674"],"apc_list":null,"apc_paid":null,"fwci":2.0986,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.8938295,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10346","display_name":"Magnetic confinement fusion research","score":0.7547000050544739,"subfield":{"id":"https://openalex.org/subfields/3106","display_name":"Nuclear and High Energy Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10346","display_name":"Magnetic confinement fusion research","score":0.7547000050544739,"subfield":{"id":"https://openalex.org/subfields/3106","display_name":"Nuclear and High Energy Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.7401999831199646,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.7087000012397766,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5551999807357788},{"id":"https://openalex.org/keywords/pareto-principle","display_name":"Pareto principle","score":0.47029998898506165},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.4674000144004822},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.4449999928474426},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4447000026702881},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.4284000098705292},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.38269999623298645},{"id":"https://openalex.org/keywords/logarithm","display_name":"Logarithm","score":0.382099986076355},{"id":"https://openalex.org/keywords/data-point","display_name":"Data point","score":0.35830000042915344}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.705299973487854},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5551999807357788},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.47029998898506165},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.4674000144004822},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.4449999928474426},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4447000026702881},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.4284000098705292},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.39959999918937683},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.38269999623298645},{"id":"https://openalex.org/C39927690","wikidata":"https://www.wikidata.org/wiki/Q11197","display_name":"Logarithm","level":2,"score":0.382099986076355},{"id":"https://openalex.org/C21080849","wikidata":"https://www.wikidata.org/wiki/Q13611879","display_name":"Data point","level":2,"score":0.35830000042915344},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.35659998655319214},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.3472000062465668},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.3449999988079071},{"id":"https://openalex.org/C106516650","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm design","level":2,"score":0.31060001254081726},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.30730000138282776},{"id":"https://openalex.org/C120823896","wikidata":"https://www.wikidata.org/wiki/Q1043226","display_name":"Product design","level":3,"score":0.3070000112056732},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29980000853538513},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28929999470710754},{"id":"https://openalex.org/C138852830","wikidata":"https://www.wikidata.org/wiki/Q2292993","display_name":"Design methods","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2816999852657318},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.26930001378059387},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C31352089","wikidata":"https://www.wikidata.org/wiki/Q3750474","display_name":"Systems design","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C32900221","wikidata":"https://www.wikidata.org/wiki/Q181365","display_name":"Dot product","level":2,"score":0.2547000050544739},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.25450000166893005},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11132989","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132989","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1998747003","https://openalex.org/W2153039279","https://openalex.org/W2313356641","https://openalex.org/W2606722458","https://openalex.org/W2739351760","https://openalex.org/W2743322459","https://openalex.org/W2751699127","https://openalex.org/W2946609015","https://openalex.org/W2981323010","https://openalex.org/W2981981507","https://openalex.org/W3047358464","https://openalex.org/W3047486355","https://openalex.org/W3092209569","https://openalex.org/W4200429237","https://openalex.org/W4296914491","https://openalex.org/W4313377427","https://openalex.org/W4319996342","https://openalex.org/W4380874652","https://openalex.org/W4385187428","https://openalex.org/W4400315128","https://openalex.org/W4402475781","https://openalex.org/W4404133682"],"related_works":[],"abstract_inverted_index":{"To":[0],"accelerate":[1],"AI":[2],"applications,":[3],"numerous":[4],"data":[5,38,48,73,103],"formats":[6,39,104],"and":[7,33,40,43,55,75,86,113,122],"physical":[8],"implementations":[9],"of":[10,28,63],"matrix":[11],"multiplication":[12],"have":[13],"been":[14],"proposed,":[15],"creating":[16],"a":[17],"complex":[18],"design":[19,78],"space.":[20],"This":[21],"paper":[22],"studies":[23],"the":[24,29,52,82,93],"efficient":[25],"MAC":[26,69,99],"implementation":[27],"integer,":[30],"floating-point,":[31],"posit,":[32],"logarithmic":[34],"number":[35],"system":[36],"(LNS)":[37],"Microscaling":[41],"(MX)":[42],"VectorScaled":[44],"Quantization":[45],"(VSQ)":[46],"block":[47],"formats.":[49],"We":[50,89],"evaluate":[51],"area,":[53],"power,":[54],"numerical":[56,95],"accuracy":[57],"(evaluated":[58],"as":[59,81],"signal-to-quantization":[60],"noise":[61],"ratio)":[62],"$\\mathbf{3":[64],"5,":[65],"0":[66,67],"0}$":[68],"designs":[70,100],"spanning":[71],"each":[72],"format":[74],"several":[76],"key":[77],"parameters":[79],"such":[80],"inner":[83],"product":[84,125],"size":[85],"accumulation":[87],"width.":[88],"find":[90],"that":[91],"for":[92],"same":[94],"accuracy,":[96],"pareto":[97],"optimal":[98],"with":[101],"emerging":[102],"(LNS16,":[105],"MXINT8,":[106],"VSQINT4)":[107],"achieve":[108],"$1.8":[109],"\\times":[110],"2.2":[111],"\\times$,":[112],"$1.9":[114],"\\times$":[115],"TOPs/W":[116],"improvement":[117],"compared":[118],"to":[119],"FP16,":[120],"FP8,":[121],"FP4":[123],"dot":[124],"implementations.":[126]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
