{"id":"https://openalex.org/W4414197919","doi":"https://doi.org/10.1109/dac63849.2025.11132980","title":"PISA: Efficient Precision-Slice Framework for LLMs with Adaptive Numerical Type","display_name":"PISA: Efficient Precision-Slice Framework for LLMs with Adaptive Numerical Type","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414197919","doi":"https://doi.org/10.1109/dac63849.2025.11132980"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11132980","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132980","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101397543","display_name":"Ning Yang","orcid":"https://orcid.org/0000-0002-1938-6287"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ning Yang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107269495","display_name":"Zongwu Wang","orcid":"https://orcid.org/0009-0003-2157-4927"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zongwu Wang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060999547","display_name":"Qingxiao Sun","orcid":"https://orcid.org/0000-0003-2927-362X"},"institutions":[{"id":"https://openalex.org/I204553293","display_name":"China University of Petroleum, Beijing","ror":"https://ror.org/041qf4r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I204553293"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingxiao Sun","raw_affiliation_strings":["China University of Petroleum-Beijing"],"affiliations":[{"raw_affiliation_string":"China University of Petroleum-Beijing","institution_ids":["https://openalex.org/I204553293"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072265593","display_name":"Liqiang Lu","orcid":"https://orcid.org/0000-0002-3801-6847"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liqiang Lu","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017670541","display_name":"Fangxin Liu","orcid":"https://orcid.org/0000-0002-8769-293X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangxin Liu","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101397543"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35880762,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13999","display_name":"Digital Rights Management and Security","score":0.8296999931335449,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13999","display_name":"Digital Rights Management and Security","score":0.8296999931335449,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.7505999803543091,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.7430999875068665,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.6599000096321106},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6348999738693237},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6315000057220459},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.5669999718666077},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.5598000288009644},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5526000261306763},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.5012999773025513},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.43630000948905945}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7170000076293945},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.6599000096321106},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6348999738693237},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6315000057220459},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.5669999718666077},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.5598000288009644},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5526000261306763},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.5012999773025513},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.45010000467300415},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.43630000948905945},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.4032000005245209},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.38199999928474426},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.35499998927116394},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.3492000102996826},{"id":"https://openalex.org/C66024118","wikidata":"https://www.wikidata.org/wiki/Q1122506","display_name":"Computational model","level":2,"score":0.3249000012874603},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3215000033378601},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2831999957561493},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C111219384","wikidata":"https://www.wikidata.org/wiki/Q6954384","display_name":"NIST","level":2,"score":0.260699987411499},{"id":"https://openalex.org/C11644782","wikidata":"https://www.wikidata.org/wiki/Q15401790","display_name":"Cost efficiency","level":2,"score":0.2542000114917755},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.2535000145435333}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11132980","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132980","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W2322516248","https://openalex.org/W2725159389","https://openalex.org/W2883920103","https://openalex.org/W2923014074","https://openalex.org/W2997375145","https://openalex.org/W3092209569","https://openalex.org/W3100985894","https://openalex.org/W3139203094","https://openalex.org/W3204021316","https://openalex.org/W4293023369","https://openalex.org/W4308083739","https://openalex.org/W4366341968","https://openalex.org/W4393406920","https://openalex.org/W4404057219","https://openalex.org/W4404134117"],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"have":[4],"transformed":[5],"numerous":[6],"AI":[7],"applications,":[8],"with":[9,76,168,198],"on-device":[10],"deployment":[11,35],"becoming":[12],"increasingly":[13],"important":[14,152],"for":[15,66,151,158],"reducing":[16],"cloud":[17],"computing":[18],"costs":[19],"and":[20,29,85,154,192,215],"protecting":[21],"user":[22],"privacy.":[23],"However,":[24],"the":[25,48,106,128,134,137],"astronomical":[26],"model":[27,74,200,216],"size":[28],"limited":[30],"hardware":[31,77,88,170],"resources":[32],"pose":[33],"significant":[34,160],"challenges.":[36],"Model":[37],"quantization":[38],"is":[39],"a":[40,116,179,187],"promising":[41],"approach":[42,204],"to":[43,72,80,132],"mitigate":[44],"this":[45,60],"gap,":[46],"but":[47],"presence":[49],"of":[50,109,136],"outliers":[51],"in":[52,105],"LLMs":[53,110],"reduces":[54],"its":[55],"effectiveness.":[56],"Previous":[57],"efforts":[58],"addressed":[59],"issue":[61],"by":[62,111,147],"employing":[63],"compression-based":[64],"encoding":[65],"mixed-precision":[67],"quantization.":[68],"These":[69],"approaches":[70],"struggle":[71],"balance":[73],"accuracy":[75,201],"efficiency":[78,214],"due":[79],"their":[81],"value-wise":[82],"outlier":[83],"granularity":[84],"complex":[86,176],"encoding/decoding":[87],"logic.":[89],"To":[90],"address":[91],"this,":[92],"we":[93],"propose":[94],"PISA":[95,120,181],"(Precision-Slice":[96],"Framework),":[97],"an":[98,122],"acceleration":[99],"framework":[100],"that":[101,126],"exploits":[102],"massive":[103],"sparsity":[104],"higher-order":[107],"part":[108],"splitting":[112],"16-bit":[113],"values":[114,157],"into":[115],"4-bit/12-bit":[117],"format.":[118],"Crucially,":[119],"introduces":[121],"early":[123],"bird":[124],"mechanism":[125,142],"leverages":[127],"high-order":[129],"4-bit":[130],"computation":[131],"predict":[133],"importance":[135],"full":[138],"calculation":[139],"result.":[140],"This":[141,162,203],"enables":[143,205],"efficient":[144,207],"computational":[145,213],"skips":[146],"continuing":[148],"execution":[149],"only":[150],"computations":[153],"using":[155],"preset":[156],"less":[159],"ones.":[161],"scheme":[163],"can":[164],"be":[165],"efficiently":[166],"integrated":[167],"existing":[169],"accelerators":[171],"like":[172],"systolic":[173],"arrays":[174],"without":[175],"encoding/decoding.":[177],"As":[178],"result,":[180],"outperforms":[182],"state-of-the-art":[183],"precision-aware":[184],"accelerators,":[185],"achieving":[186],"$1.3-4.3":[188],"\\times$":[189],"performance":[190],"boost":[191],"$14.3-66.7":[193],"\\%$":[194],"greater":[195],"energy":[196],"efficiency,":[197],"minimal":[199],"loss.":[202],"more":[206],"ondevice":[208],"LLM":[209],"deployment,":[210],"effectively":[211],"balancing":[212],"accuracy.":[217]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
