{"id":"https://openalex.org/W3139307480","doi":"https://doi.org/10.1109/cgo51591.2021.9370335","title":"Unleashing the Low-Precision Computation Potential of Tensor Cores on GPUs","display_name":"Unleashing the Low-Precision Computation Potential of Tensor Cores on GPUs","publication_year":2021,"publication_date":"2021-02-27","ids":{"openalex":"https://openalex.org/W3139307480","doi":"https://doi.org/10.1109/cgo51591.2021.9370335","mag":"3139307480"},"language":"en","primary_location":{"id":"doi:10.1109/cgo51591.2021.9370335","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cgo51591.2021.9370335","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/ACM International Symposium on Code Generation and Optimization (CGO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100607669","display_name":"Guangli Li","orcid":"https://orcid.org/0000-0002-9738-261X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guangli Li","raw_affiliation_strings":["School of Computer Science and Technology, University of Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024664385","display_name":"Jingling Xue","orcid":"https://orcid.org/0000-0003-0380-3506"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jingling Xue","raw_affiliation_strings":["School of Computer Science and Engineering, University of New South Wales, Australia"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, University of New South Wales, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100349661","display_name":"Lei Liu","orcid":"https://orcid.org/0000-0003-4854-7382"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Liu","raw_affiliation_strings":["SKL of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"SKL of Computer Architecture, Institute of Computing Technology, Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100425338","display_name":"Xueying Wang","orcid":"https://orcid.org/0000-0002-7835-113X"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueying Wang","raw_affiliation_strings":["School of Computer Science and Technology, University of Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101243381","display_name":"Xiu Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]},{"id":"https://openalex.org/I4210134929","display_name":"Jilin Province Science and Technology Department","ror":"https://ror.org/049x38272","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210134929"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiu Ma","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, China","institution_ids":["https://openalex.org/I4210134929","https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082181196","display_name":"Xiao Dong","orcid":"https://orcid.org/0000-0003-3593-2249"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao Dong","raw_affiliation_strings":["School of Computer Science and Technology, University of Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083485818","display_name":"Jiansong Li","orcid":"https://orcid.org/0000-0002-2924-5189"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiansong Li","raw_affiliation_strings":["School of Computer Science and Technology, University of Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053070701","display_name":"Xiaobing Feng","orcid":"https://orcid.org/0000-0003-2909-7750"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaobing Feng","raw_affiliation_strings":["School of Computer Science and Technology, University of Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100607669"],"corresponding_institution_ids":["https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":2.0725,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86336384,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"90","last_page":"102"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.984000027179718,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.7596559524536133},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7523757219314575},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.690244734287262},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.49636775255203247},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.4489075243473053},{"id":"https://openalex.org/keywords/double-precision-floating-point-format","display_name":"Double-precision floating-point format","score":0.44253861904144287},{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.43618929386138916},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.43237653374671936},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4259081184864044},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3447381258010864},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.297161728143692},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1279757022857666}],"concepts":[{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.7596559524536133},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7523757219314575},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.690244734287262},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.49636775255203247},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.4489075243473053},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.44253861904144287},{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.43618929386138916},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.43237653374671936},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4259081184864044},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3447381258010864},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.297161728143692},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1279757022857666},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C27206212","wikidata":"https://www.wikidata.org/wiki/Q34178","display_name":"Theology","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cgo51591.2021.9370335","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cgo51591.2021.9370335","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE/ACM International Symposium on Code Generation and Optimization (CGO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8999999761581421,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W1128809682","https://openalex.org/W1724438581","https://openalex.org/W1988201922","https://openalex.org/W2048266589","https://openalex.org/W2097117768","https://openalex.org/W2108598243","https://openalex.org/W2119144962","https://openalex.org/W2152839228","https://openalex.org/W2157881433","https://openalex.org/W2162322364","https://openalex.org/W2163605009","https://openalex.org/W2194775991","https://openalex.org/W2207050309","https://openalex.org/W2233116163","https://openalex.org/W2267635276","https://openalex.org/W2319920447","https://openalex.org/W2402144811","https://openalex.org/W2565851976","https://openalex.org/W2593245696","https://openalex.org/W2606722458","https://openalex.org/W2748818695","https://openalex.org/W2765439756","https://openalex.org/W2783538964","https://openalex.org/W2787884921","https://openalex.org/W2791673912","https://openalex.org/W2796649226","https://openalex.org/W2883920103","https://openalex.org/W2895305554","https://openalex.org/W2899771611","https://openalex.org/W2901549770","https://openalex.org/W2904180680","https://openalex.org/W2907701003","https://openalex.org/W2911591717","https://openalex.org/W2919115771","https://openalex.org/W2920798074","https://openalex.org/W2953384591","https://openalex.org/W2963122961","https://openalex.org/W2963257194","https://openalex.org/W2963989532","https://openalex.org/W2964017942","https://openalex.org/W2964228333","https://openalex.org/W2964299589","https://openalex.org/W2982041622","https://openalex.org/W2989126191","https://openalex.org/W2996843284","https://openalex.org/W2997106510","https://openalex.org/W2998218113","https://openalex.org/W2999942548","https://openalex.org/W3000589962","https://openalex.org/W3101543398","https://openalex.org/W3104528661","https://openalex.org/W3118608800","https://openalex.org/W3143293593","https://openalex.org/W4240168186","https://openalex.org/W4295262505","https://openalex.org/W6684191040","https://openalex.org/W6693397755","https://openalex.org/W6713134421","https://openalex.org/W6756040250","https://openalex.org/W6757794950","https://openalex.org/W6765813243"],"related_works":["https://openalex.org/W2381242807","https://openalex.org/W2751208925","https://openalex.org/W2347541121","https://openalex.org/W3183118997","https://openalex.org/W3214410901","https://openalex.org/W3204400881","https://openalex.org/W3204296682","https://openalex.org/W2917767146","https://openalex.org/W4301095421","https://openalex.org/W2949390274"],"abstract_inverted_index":{"Tensor-specialized":[0],"hardware":[1,36],"for":[2,76,105],"supporting":[3],"low-precision":[4,87],"arithmetic":[5],"has":[6],"become":[7],"an":[8],"inevitable":[9],"trend":[10],"due":[11],"to":[12,39,116],"the":[13,41,56,107,139],"ever-increasing":[14],"demand":[15],"on":[16,34,91],"computational":[17,52],"capability":[18],"and":[19,114,125],"energy":[20],"efficiency":[21],"in":[22,45,59],"intelligent":[23],"applications.":[24],"The":[25,95],"main":[26],"challenge":[27,69],"faced":[28],"when":[29],"accelerating":[30,77],"a":[31,60,73,99],"tensor":[32,79,84,89],"program":[33],"tensor-specialized":[35],"is":[37,98],"how":[38],"achieve":[40,133],"best":[42],"performance":[43,113,135],"possible":[44],"reduced":[46],"precision":[47,57,102,115,140],"by":[48,70,81],"fully":[49],"utilizing":[50],"its":[51,83],"resources":[53],"while":[54,137],"keeping":[55],"loss":[58,141],"controlled":[61],"manner.":[62],"In":[63],"this":[64,68],"paper,":[65],"we":[66],"address":[67],"proposing":[71],"QUANTENSOR,":[72],"new":[74,100],"approach":[75],"general-purpose":[78],"programs":[80],"replacing":[82],"computations":[85,90],"with":[86,120],"quantized":[88],"NVIDIA":[92],"Tensor":[93],"Cores.":[94],"key":[96],"novelty":[97],"residual-based":[101],"refinement":[103],"technique":[104],"controlling":[106],"quantization":[108],"errors,":[109],"allowing":[110],"tradeoffs":[111],"between":[112],"be":[117],"made.":[118],"Evaluation":[119],"GEMM,":[121],"deep":[122],"neural":[123],"networks,":[124],"linear":[126],"algebra":[127],"applications":[128],"shows":[129],"that":[130],"QUANTENSOR":[131],"can":[132],"remarkable":[134],"improvements":[136],"reducing":[138],"incurred":[142],"significantly":[143],"at":[144],"acceptable":[145],"overheads.":[146]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2026-02-25T08:12:03.925757","created_date":"2025-10-10T00:00:00"}
