{"id":"https://openalex.org/W4410583010","doi":"https://doi.org/10.23919/date64628.2025.10993106","title":"OPS: Outlier-Aware Precision-Slice Framework for LLM Acceleration","display_name":"OPS: Outlier-Aware Precision-Slice Framework for LLM Acceleration","publication_year":2025,"publication_date":"2025-03-31","ids":{"openalex":"https://openalex.org/W4410583010","doi":"https://doi.org/10.23919/date64628.2025.10993106"},"language":"en","primary_location":{"id":"doi:10.23919/date64628.2025.10993106","is_oa":false,"landing_page_url":"https://doi.org/10.23919/date64628.2025.10993106","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017670541","display_name":"Fangxin Liu","orcid":"https://orcid.org/0000-0002-8769-293X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fangxin Liu","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030493839","display_name":"Ning Yang","orcid":"https://orcid.org/0009-0004-6964-8910"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ning Yang","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107269495","display_name":"Zongwu Wang","orcid":"https://orcid.org/0009-0003-2157-4927"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zongwu Wang","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062059596","display_name":"Xuanpeng Zhu","orcid":"https://orcid.org/0009-0000-4859-9604"},"institutions":[{"id":"https://openalex.org/I4210098582","display_name":"ZTE (China)","ror":"https://ror.org/00rjhhq63","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210098582"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuanpeng Zhu","raw_affiliation_strings":["ZTE Corporation,China"],"affiliations":[{"raw_affiliation_string":"ZTE Corporation,China","institution_ids":["https://openalex.org/I4210098582"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066348053","display_name":"Haidong Yao","orcid":"https://orcid.org/0009-0000-6911-9796"},"institutions":[{"id":"https://openalex.org/I4210098582","display_name":"ZTE (China)","ror":"https://ror.org/00rjhhq63","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210098582"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haidong Yao","raw_affiliation_strings":["ZTE Corporation,China"],"affiliations":[{"raw_affiliation_string":"ZTE Corporation,China","institution_ids":["https://openalex.org/I4210098582"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112620081","display_name":"Xiankui Xiong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210098582","display_name":"ZTE (China)","ror":"https://ror.org/00rjhhq63","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210098582"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiankui Xiong","raw_affiliation_strings":["ZTE Corporation,China"],"affiliations":[{"raw_affiliation_string":"ZTE Corporation,China","institution_ids":["https://openalex.org/I4210098582"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071154794","display_name":"Qi Sun","orcid":"https://orcid.org/0000-0002-5434-1324"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Sun","raw_affiliation_strings":["Zhejiang University"],"affiliations":[{"raw_affiliation_string":"Zhejiang University","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108050285","display_name":"Jiang Li","orcid":"https://orcid.org/0009-0008-0549-7438"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Jiang","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5017670541"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1495905,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.975600004196167,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.975600004196167,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10346","display_name":"Magnetic confinement fusion research","score":0.9240999817848206,"subfield":{"id":"https://openalex.org/subfields/3106","display_name":"Nuclear and High Energy Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9211999773979187,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.7840086817741394},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6833905577659607},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.5194627642631531},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25350672006607056},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11241957545280457}],"concepts":[{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.7840086817741394},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6833905577659607},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.5194627642631531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25350672006607056},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11241957545280457},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/date64628.2025.10993106","is_oa":false,"landing_page_url":"https://doi.org/10.23919/date64628.2025.10993106","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Design, Automation &amp;amp; Test in Europe Conference (DATE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G5952030521","display_name":null,"funder_award_id":"62402311","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8995253305","display_name":null,"funder_award_id":"24ZR1433700","funder_id":"https://openalex.org/F4320309612","funder_display_name":"Natural Science Foundation of Shanghai"}],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":4,"referenced_works":["https://openalex.org/W3204021316","https://openalex.org/W4393406920","https://openalex.org/W4404134117","https://openalex.org/W6847478871"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W3006513224","https://openalex.org/W2376932109","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"have":[4],"transformed":[5],"numerous":[6],"AI":[7],"applications,":[8],"with":[9,76,168,193],"on-device":[10,203],"deployment":[11,35],"becoming":[12],"increasingly":[13],"important":[14,152],"for":[15,66,151,158],"reducing":[16],"cloud":[17],"computing":[18],"costs":[19],"and":[20,29,85,154,210],"protecting":[21],"user":[22],"privacy.":[23],"However,":[24],"the":[25,48,106,128,134,137],"astronomical":[26],"model":[27,74,195,211],"size":[28],"limited":[30],"hardware":[31,88,170],"resources":[32],"pose":[33],"significant":[34,160],"challenges.":[36],"Model":[37],"quantization":[38],"is":[39],"a":[40,116,179,187],"promising":[41],"approach":[42,199],"to":[43,72,80,132],"mitigate":[44],"this":[45,60],"gap,":[46],"but":[47],"presence":[49],"of":[50,109,136],"outliers":[51],"in":[52,105],"LLMs":[53,110],"reduces":[54],"its":[55],"effectiveness.":[56],"Previous":[57],"efforts":[58],"addressed":[59],"issue":[61],"by":[62,111,147],"employing":[63],"compression-based":[64],"encoding":[65],"mixed-precision":[67],"quantization.":[68],"These":[69],"approaches":[70],"struggle":[71],"balance":[73],"accuracy":[75,196],"hard-ware":[77],"efficiency":[78,209],"due":[79],"their":[81],"value-wise":[82],"outlier":[83],"granularity":[84],"complex":[86,176],"encoding/decoding":[87],"logic.":[89],"To":[90],"address":[91],"this,":[92],"we":[93],"propose":[94],"OPS":[95,120,181],"(Outlier-aware":[96],"Precision-Slicing),":[97],"an":[98,122],"acceleration":[99],"framework":[100],"that":[101,126],"exploits":[102],"massive":[103],"sparsity":[104],"higher-order":[107],"part":[108],"splitting":[112],"16-bit":[113],"values":[114,157],"into":[115],"4-bit/12-bit":[117],"format.":[118],"Crucially,":[119],"introduces":[121],"early":[123],"bird":[124],"mechanism":[125,142],"leverages":[127],"high-order":[129],"4-bit":[130],"computation":[131],"predict":[133],"importance":[135],"full":[138],"calculation":[139],"result.":[140],"This":[141,162,198],"enables":[143,200],"efficient":[144,202],"computational":[145,208],"skips":[146],"continuing":[148],"execution":[149],"only":[150],"computations":[153],"using":[155],"preset":[156],"less":[159],"ones.":[161],"scheme":[163],"can":[164],"be":[165],"efficiently":[166],"integrated":[167],"existing":[169],"accelerators":[171],"like":[172],"systolic":[173],"arrays":[174],"without":[175],"encoding/decoding.":[177],"As":[178],"result,":[180],"outperforms":[182],"state-of-the-art":[183],"outlier-aware":[184],"accelerators,":[185],"achieving":[186],"1.3":[188],"\u2212":[189],"4.3\u00d7":[190],"performance":[191],"boost":[192],"minimal":[194],"loss.":[197],"more":[201],"LLM":[204],"deployment,":[205],"effectively":[206],"balancing":[207],"accuracy.":[212]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
