{"id":"https://openalex.org/W4389072746","doi":"https://doi.org/10.48550/arxiv.2311.14114","title":"SONIQ: System-Optimized Noise-Injected Ultra-Low-Precision Quantization with Full-Precision Parity","display_name":"SONIQ: System-Optimized Noise-Injected Ultra-Low-Precision Quantization with Full-Precision Parity","publication_year":2023,"publication_date":"2023-11-23","ids":{"openalex":"https://openalex.org/W4389072746","doi":"https://doi.org/10.48550/arxiv.2311.14114"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2311.14114","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.14114","pdf_url":"https://arxiv.org/pdf/2311.14114","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2311.14114","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101252052","display_name":"Cyrus Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhou, Cyrus","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103140730","display_name":"Vaughn Richard","orcid":"https://orcid.org/0009-0003-1048-9532"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Savarese, Pedro","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085741578","display_name":"Pedro Savarese","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hassman, Zack","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093356443","display_name":"Zachary Hassman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Richard, Vaughn","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001371942","display_name":"Michael Maire","orcid":"https://orcid.org/0000-0002-9778-6673"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"DiBrino, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093356444","display_name":"Michael DiBrino","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maire, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101989053","display_name":"Yanjing Li","orcid":"https://orcid.org/0000-0003-0124-0463"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Yanjing","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5101252052"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9865000247955322,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.965499997138977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7930878400802612},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5795553922653198},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5507845878601074},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5320703983306885},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.49936938285827637},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4965687394142151},{"id":"https://openalex.org/keywords/design-space-exploration","display_name":"Design space exploration","score":0.48895108699798584},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.432742178440094},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.4136267900466919},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.36070123314857483},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.35846009850502014},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3580073118209839},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3125978708267212},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.139369934797287},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.12461495399475098}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7930878400802612},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5795553922653198},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5507845878601074},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5320703983306885},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.49936938285827637},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4965687394142151},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.48895108699798584},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.432742178440094},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.4136267900466919},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.36070123314857483},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.35846009850502014},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3580073118209839},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3125978708267212},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.139369934797287},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.12461495399475098},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2311.14114","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.14114","pdf_url":"https://arxiv.org/pdf/2311.14114","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2311.14114","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2311.14114","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2311.14114","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.14114","pdf_url":"https://arxiv.org/pdf/2311.14114","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.8999999761581421}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2111062328","https://openalex.org/W2108386578","https://openalex.org/W1572417577","https://openalex.org/W91363257","https://openalex.org/W2142450926","https://openalex.org/W3139655666","https://openalex.org/W2118185763","https://openalex.org/W3183118997","https://openalex.org/W1550409889","https://openalex.org/W2000001288"],"abstract_inverted_index":{"Ultra-low-precision":[0],"inference":[1],"can":[2],"sharply":[3],"reduce":[4],"memory":[5],"and":[6,12,33,67,74,97,127,149,184],"latency":[7],"but":[8],"often":[9],"degrades":[10],"accuracy":[11,164],"relies":[13],"on":[14,175],"specialized":[15],"hardware.":[16],"We":[17],"present":[18],"SONIQ,":[19],"a":[20],"system-optimized,":[21],"noise-injected":[22],"quantization":[23,47,182],"framework":[24,158],"that":[25,116],"learns":[26],"per-channel":[27,118],"mixed":[28],"precision":[29,119],"for":[30],"both":[31],"weights":[32],"activations":[34],"while":[35,78,172],"training":[36],"under":[37,165],"the":[38,55,124,130,144,156,179],"same":[39],"rules":[40],"used":[41,58],"at":[42,59,137],"inference.":[43,187],"By":[44],"injecting":[45],"hardware-calibrated":[46],"noise":[48],"during":[49],"training,":[50],"SONIQ":[51,69,86,154],"steers":[52],"models":[53],"toward":[54],"discrete":[56],"arithmetic":[57],"deployment":[60],"--":[61,121,133],"without":[62],"bespoke":[63],"runtimes.":[64],"Across":[65],"CNNs":[66],"Transformers,":[68],"achieves":[70],"up":[71,88,98],"to":[72,89,99,110,159],"16x":[73],"7x":[75],"compression,":[76],"respectively,":[77],"matching":[79],"or":[80,161],"exceeding":[81],"full-precision":[82,163],"accuracy.":[83],"Measured":[84],"end-to-end,":[85],"delivers":[87],"7.3x":[90],"CPU":[91],"speedup":[92,108],"over":[93],"strong":[94],"INT8":[95],"baselines":[96],"6.3x":[100],"(vector":[101],"units)":[102],"/":[103],"2.8x":[104],"(tensor":[105],"cores)":[106],"GPU":[107],"relative":[109],"FP16.":[111],"A":[112],"practical":[113],"outcome":[114],"is":[115,155],"two":[117],"levels":[120],"one":[122,128,142],"in":[123,129,135],"1--4-bit":[125],"range":[126,132],"4--8-bit":[131],"suffice":[134],"practice;":[136],"inference,":[138],"each":[139],"channel":[140],"selects":[141],"of":[143],"two,":[145],"keeping":[146],"kernels":[147],"simple":[148],"fast.":[150],"To":[151],"our":[152],"knowledge,":[153],"first":[157],"reach":[160],"surpass":[162],"ultra-low":[166],"(1--4":[167],"bits":[168],"per":[169],"parameter)":[170],"regimes":[171],"remaining":[173],"deployable":[174],"commodity":[176],"hardware,":[177],"narrowing":[178],"gap":[180],"between":[181],"theory":[183],"practical,":[185],"high-throughput":[186]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-12T23:11:45.498971","created_date":"2023-11-28T00:00:00"}
