{"id":"https://openalex.org/W4411867114","doi":"https://doi.org/10.1109/tcad.2025.3585023","title":"Oiso: Outlier-Isolated Data Format for Low-Bit Large Language Model Quantization","display_name":"Oiso: Outlier-Isolated Data Format for Low-Bit Large Language Model Quantization","publication_year":2025,"publication_date":"2025-07-01","ids":{"openalex":"https://openalex.org/W4411867114","doi":"https://doi.org/10.1109/tcad.2025.3585023"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2025.3585023","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2025.3585023","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092876716","display_name":"Lancheng Zou","orcid":"https://orcid.org/0009-0004-6820-7064"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Lancheng Zou","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","Chinese University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0009-0004-6820-7064","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Chinese University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101319284","display_name":"Shuo Yin","orcid":"https://orcid.org/0000-0003-4927-0194"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Shuo Yin","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","Chinese University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0003-4927-0194","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Chinese University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086203449","display_name":"Mingjun Li","orcid":"https://orcid.org/0009-0007-6740-8413"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Mingjun Li","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","Chinese University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0009-0007-6740-8413","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Chinese University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111357277","display_name":"Mingzi Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Mingzi Wang","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","Chinese University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Chinese University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100662676","display_name":"Chen Bai","orcid":"https://orcid.org/0000-0002-1742-0090"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]},{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Chen Bai","raw_affiliation_strings":["Department of Electronic and Computer Engineering, The Hong Kong University of Science and Technology, Sai Kung, Hong Kong","Chinese University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electronic and Computer Engineering, The Hong Kong University of Science and Technology, Sai Kung, Hong Kong","institution_ids":["https://openalex.org/I200769079"]},{"raw_affiliation_string":"Chinese University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013503446","display_name":"Wenqian Zhao","orcid":"https://orcid.org/0000-0001-9501-9254"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Wenqian Zhao","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","Chinese University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0001-9501-9254","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Chinese University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051340429","display_name":"Bei Yu","orcid":"https://orcid.org/0000-0001-6406-4810"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Bei Yu","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","Chinese University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0001-6406-4810","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong, Shatin, SAR, Hong Kong","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Chinese University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5092876716"],"corresponding_institution_ids":["https://openalex.org/I177725633"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07316642,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"45","issue":"2","first_page":"929","last_page":"942"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9847999811172485,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.706462025642395},{"id":"https://openalex.org/keywords/bit","display_name":"Bit (key)","score":0.6359215974807739},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.588117778301239},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5483095645904541},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4131513237953186},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3952711820602417},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2772340476512909},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.08251571655273438}],"concepts":[{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.706462025642395},{"id":"https://openalex.org/C117011727","wikidata":"https://www.wikidata.org/wiki/Q1278488","display_name":"Bit (key)","level":2,"score":0.6359215974807739},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.588117778301239},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5483095645904541},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4131513237953186},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3952711820602417},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2772340476512909},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.08251571655273438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcad.2025.3585023","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2025.3585023","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1983394510","https://openalex.org/W2048266589","https://openalex.org/W2170142399","https://openalex.org/W2289252105","https://openalex.org/W2346205343","https://openalex.org/W2606722458","https://openalex.org/W2883920103","https://openalex.org/W2923014074","https://openalex.org/W2946355854","https://openalex.org/W2963015836","https://openalex.org/W2963122961","https://openalex.org/W2963367920","https://openalex.org/W2963679555","https://openalex.org/W2979826702","https://openalex.org/W2998617917","https://openalex.org/W3034940165","https://openalex.org/W3043504674","https://openalex.org/W3092209569","https://openalex.org/W3100985894","https://openalex.org/W3208633927","https://openalex.org/W4308083739","https://openalex.org/W4366341968","https://openalex.org/W4387494819","https://openalex.org/W4389519100"],"related_works":["https://openalex.org/W4327546585","https://openalex.org/W2411923897","https://openalex.org/W4394546135","https://openalex.org/W4285347720","https://openalex.org/W4200259850","https://openalex.org/W2333831899","https://openalex.org/W2484894494","https://openalex.org/W2367385042","https://openalex.org/W4381186982","https://openalex.org/W3204019825"],"abstract_inverted_index":{"The":[0],"scale":[1],"of":[2,55,145,157,227],"large":[3],"language":[4],"models":[5],"(LLMs)":[6],"has":[7],"steadily":[8],"increased":[9],"over":[10],"time,":[11],"leading":[12,99],"to":[13,34,52,61,65,100,191],"enhanced":[14],"performance":[15,243],"in":[16,177],"multi-modal":[17],"understanding":[18],"and":[19,37,46,97,104,129,171,196,210,231,245],"complex":[20],"reasoning,":[21],"but":[22],"with":[23,82,184,206,241],"significant":[24],"execution":[25],"overhead":[26,39],"on":[27,147],"hardware.":[28],"Quantization":[29],"is":[30,49,122,189],"a":[31,90,123,167,178,185],"promising":[32],"approach":[33],"reduce":[35,142,192],"computation":[36],"memory":[38,106],"for":[40,94,115,126,213],"LLM":[41,117,219,229],"deployment.":[42],"However,":[43,86],"maintaining":[44],"accuracy":[45,63],"efficiency":[47],"simultaneously":[48],"challenging":[50],"due":[51,64],"the":[53,75,79,134,138,143,148,152,158,193,197,202,225,232,236],"presence":[54],"outliers.":[56],"Moreover,":[57],"low-bit":[58,116,218,228],"quantization":[59,118,153,222],"tends":[60],"deteriorate":[62],"its":[66],"limited":[67],"precision.":[68],"Existing":[69],"outlier-aware":[70],"quantization/hardware":[71],"co-design":[72],"methods":[73],"split":[74],"sparse":[76],"outliers":[77,128,146],"from":[78,137],"normal":[80,95,130,135,149],"values":[81,96,136,150,173],"dedicated":[83],"encoding":[84,183,194],"schemes.":[85],"such":[87],"separation":[88],"produces":[89],"non-uniform":[91],"data":[92,113],"format":[93,114],"outliers,":[98,139],"additional":[101],"hardware":[102,198],"design":[103,240],"inefficient":[105],"access.":[107],"This":[108],"paper":[109],"presents":[110],"an":[111],"outlier-isolated":[112],"called":[119],"Oiso.":[120],"Oiso":[121,161,172,203,207,214,221,233],"unified":[124],"representation":[125],"both":[127],"values.":[131],"It":[132],"isolates":[133],"which":[140],"can":[141,163,174,223],"impact":[144],"during":[151],"process.":[154],"Taking":[155],"advantage":[156],"uniform":[159],"format,":[160],"arithmetic":[162],"be":[164,175],"performed":[165],"using":[166],"homogeneous":[168],"computational":[169],"unit,":[170],"stored":[176],"standardized":[179],"format.":[180],"Hierarchical":[181],"block":[182],"subblock":[186],"alignment":[187],"scheme":[188],"introduced":[190],"cost":[195],"overhead.":[199],"We":[200],"introduce":[201],"architecture,":[204],"equipped":[205],"processing":[208],"elements":[209],"encoders":[211],"tailored":[212],"arithmetic,":[215],"realizing":[216],"efficient":[217],"inference.":[220],"push":[224],"limits":[226],"quantization,":[230],"accelerator":[234,239],"outperforms":[235],"state-of-the-art":[237],"outlieraware":[238],"1.26\u00d7":[242],"improvement":[244],"25%":[246],"energy":[247],"reduction.":[248]},"counts_by_year":[],"updated_date":"2026-01-24T23:23:39.755997","created_date":"2025-10-10T00:00:00"}
