{"id":"https://openalex.org/W2907166042","doi":"https://doi.org/10.1109/sips.2018.8598402","title":"Fast Quantized Arithmetic on x86: Trading Compute for Data Movement","display_name":"Fast Quantized Arithmetic on x86: Trading Compute for Data Movement","publication_year":2018,"publication_date":"2018-10-01","ids":{"openalex":"https://openalex.org/W2907166042","doi":"https://doi.org/10.1109/sips.2018.8598402","mag":"2907166042"},"language":"en","primary_location":{"id":"doi:10.1109/sips.2018.8598402","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sips.2018.8598402","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Workshop on Signal Processing Systems (SiPS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027784711","display_name":"Alen Stojanov","orcid":null},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Alen Stojanov","raw_affiliation_strings":["Department of Computer Science, ETH Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, ETH Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111393098","display_name":"Tyler Smith","orcid":null},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Tyler Michael Smith","raw_affiliation_strings":["Department of Computer Science, ETH Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, ETH Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083822059","display_name":"Dan Alistarh","orcid":"https://orcid.org/0000-0003-3650-940X"},"institutions":[{"id":"https://openalex.org/I157556583","display_name":"Institute of Science and Technology Austria","ror":"https://ror.org/03gnh5541","country_code":"AT","type":"education","lineage":["https://openalex.org/I157556583"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Dan Alistarh","raw_affiliation_strings":["IST Austria, Vienna, Austria"],"affiliations":[{"raw_affiliation_string":"IST Austria, Vienna, Austria","institution_ids":["https://openalex.org/I157556583"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076407181","display_name":"Markus P\u00fcschel","orcid":"https://orcid.org/0000-0001-8834-8551"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Markus Puschel","raw_affiliation_strings":["Department of Computer Science, ETH Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, ETH Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5027784711"],"corresponding_institution_ids":["https://openalex.org/I35440088"],"apc_list":null,"apc_paid":null,"fwci":1.0134,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.74773834,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"349","last_page":"354"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.7898916006088257},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7585790753364563},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7350070476531982},{"id":"https://openalex.org/keywords/mnist-database","display_name":"MNIST database","score":0.6367590427398682},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5962127447128296},{"id":"https://openalex.org/keywords/double-precision-floating-point-format","display_name":"Double-precision floating-point format","score":0.5520859956741333},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.529219925403595},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5099226236343384},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.4854472279548645},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.4791378676891327},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.4325740933418274},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.43202245235443115},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.3988284766674042},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.34654584527015686},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.19816157221794128},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16403019428253174},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1218779981136322},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.11745226383209229},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.09903585910797119}],"concepts":[{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.7898916006088257},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7585790753364563},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7350070476531982},{"id":"https://openalex.org/C190502265","wikidata":"https://www.wikidata.org/wiki/Q17069496","display_name":"MNIST database","level":3,"score":0.6367590427398682},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5962127447128296},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.5520859956741333},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.529219925403595},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5099226236343384},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.4854472279548645},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.4791378676891327},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.4325740933418274},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.43202245235443115},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3988284766674042},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.34654584527015686},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.19816157221794128},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16403019428253174},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1218779981136322},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.11745226383209229},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.09903585910797119},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sips.2018.8598402","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sips.2018.8598402","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Workshop on Signal Processing Systems (SiPS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W182164380","https://openalex.org/W809736386","https://openalex.org/W1902934009","https://openalex.org/W1980454827","https://openalex.org/W2124295105","https://openalex.org/W2145096794","https://openalex.org/W2164452299","https://openalex.org/W2296616510","https://openalex.org/W2300242332","https://openalex.org/W2402144811","https://openalex.org/W2647836899","https://openalex.org/W2732291649","https://openalex.org/W2741269719","https://openalex.org/W2769644379","https://openalex.org/W2787806302","https://openalex.org/W2953384591","https://openalex.org/W2963114950","https://openalex.org/W2963322354","https://openalex.org/W2964003909","https://openalex.org/W3141595720","https://openalex.org/W4240267682","https://openalex.org/W4250955649","https://openalex.org/W4300263211","https://openalex.org/W6639703010","https://openalex.org/W6698200048","https://openalex.org/W6713134421","https://openalex.org/W6741986022","https://openalex.org/W6746200960"],"related_works":["https://openalex.org/W2129537883","https://openalex.org/W2911551207","https://openalex.org/W4225987401","https://openalex.org/W4236526691","https://openalex.org/W4226140811","https://openalex.org/W2003690377","https://openalex.org/W4312862090","https://openalex.org/W4226502243","https://openalex.org/W2079351402","https://openalex.org/W4226248541"],"abstract_inverted_index":{"We":[0],"introduce":[1],"Clover,":[2],"a":[3,134],"new":[4],"library":[5,25],"for":[6,71,116],"efficient":[7],"computation":[8],"using":[9,62],"low-precision":[10],"data,":[11],"providing":[12],"mathematical":[13],"routines":[14],"required":[15],"by":[16,127,131],"fundamental":[17],"methods":[18],"in":[19,97],"optimization":[20],"and":[21,38,86,118],"sparse":[22],"recovery.":[23],"Our":[24],"faithfully":[26],"implements":[27],"variants":[28],"of":[29,68,108,123],"stochastic":[30],"quantization":[31],"that":[32,56,92],"guarantee":[33],"convergence":[34],"at":[35],"low":[36],"precision,":[37],"supports":[39],"data":[40,73,113],"formats":[41],"from":[42],"4-bit":[43,57,128],"quantized":[44],"to":[45,101,105,111],"32-bit":[46],"IEEE-754":[47],"on":[48],"current":[49],"Intel":[50,63],"processors.":[51],"In":[52],"particular,":[53],"we":[54,120],"show":[55,121],"can":[58],"be":[59],"implemented":[60],"efficiently":[61],"AVX":[64],"despite":[65],"the":[66,93,106],"lack":[67],"native":[69],"support":[70],"this":[72],"format.":[74],"Experimental":[75],"results":[76],"with":[77,103],"dot":[78],"product,":[79],"matrix-vector":[80],"multiplication":[81],"(MVM),":[82],"gradient":[83],"descent":[84],"(GD),":[85],"iterative":[87],"hard":[88],"thresholding":[89],"(IHT)":[90],"demonstrate":[91],"attainable":[94],"speedups":[95],"are":[96],"many":[98],"cases":[99],"close":[100],"linear":[102],"respect":[104],"reduction":[107],"precision":[109],"due":[110],"reduced":[112],"movement.":[114],"Finally,":[115],"GD":[117],"IHT,":[119],"examples":[122],"absolute":[124],"speedup":[125],"achieved":[126],"versus":[129],"32-bit,":[130],"iterating":[132],"until":[133],"given":[135],"target":[136],"error":[137],"is":[138],"achieved.":[139]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
