{"id":"https://openalex.org/W2773283032","doi":"https://doi.org/10.1109/iiswc.2017.8167778","title":"Understanding the performance-accuracy tradeoffs of floating-point arithmetic on GPUs","display_name":"Understanding the performance-accuracy tradeoffs of floating-point arithmetic on GPUs","publication_year":2017,"publication_date":"2017-10-01","ids":{"openalex":"https://openalex.org/W2773283032","doi":"https://doi.org/10.1109/iiswc.2017.8167778","mag":"2773283032"},"language":"en","primary_location":{"id":"doi:10.1109/iiswc.2017.8167778","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iiswc.2017.8167778","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Symposium on Workload Characterization (IISWC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088904357","display_name":"Sruthikesh Surineni","orcid":null},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sruthikesh Surineni","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Missouri"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Missouri","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033804516","display_name":"Ruidong Gu","orcid":null},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ruidong Gu","raw_affiliation_strings":["North Carolina State University"],"affiliations":[{"raw_affiliation_string":"North Carolina State University","institution_ids":["https://openalex.org/I137902535"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109368567","display_name":"Huyen Nguyen","orcid":"https://orcid.org/0009-0004-7100-6458"},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Huyen Nguyen","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Missouri"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Missouri","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041520129","display_name":"Michela Becchi","orcid":"https://orcid.org/0000-0001-8353-2915"},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]},{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michela Becchi","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Missouri","North Carolina State University"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Missouri","institution_ids":["https://openalex.org/I76835614"]},{"raw_affiliation_string":"North Carolina State University","institution_ids":["https://openalex.org/I137902535"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5088904357"],"corresponding_institution_ids":["https://openalex.org/I76835614"],"apc_list":null,"apc_paid":null,"fwci":0.2271,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.58442357,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"207","last_page":"218"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10363","display_name":"Low-power high-performance VLSI design","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/double-precision-floating-point-format","display_name":"Double-precision floating-point format","score":0.860393762588501},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.8336869478225708},{"id":"https://openalex.org/keywords/single-precision-floating-point-format","display_name":"Single-precision floating-point format","score":0.8104546070098877},{"id":"https://openalex.org/keywords/arbitrary-precision-arithmetic","display_name":"Arbitrary-precision arithmetic","score":0.7563667297363281},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7552722692489624},{"id":"https://openalex.org/keywords/saturation-arithmetic","display_name":"Saturation arithmetic","score":0.650139570236206},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6270551085472107},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6072058081626892},{"id":"https://openalex.org/keywords/floating-point-unit","display_name":"Floating-point unit","score":0.6034814119338989},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.5896164178848267},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.4931725859642029},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.45453861355781555},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4187050461769104},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35933780670166016},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1749182641506195},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.0931670069694519}],"concepts":[{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.860393762588501},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.8336869478225708},{"id":"https://openalex.org/C133095886","wikidata":"https://www.wikidata.org/wiki/Q1307173","display_name":"Single-precision floating-point format","level":3,"score":0.8104546070098877},{"id":"https://openalex.org/C83581934","wikidata":"https://www.wikidata.org/wiki/Q527381","display_name":"Arbitrary-precision arithmetic","level":2,"score":0.7563667297363281},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7552722692489624},{"id":"https://openalex.org/C182775192","wikidata":"https://www.wikidata.org/wiki/Q913725","display_name":"Saturation arithmetic","level":3,"score":0.650139570236206},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6270551085472107},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6072058081626892},{"id":"https://openalex.org/C110305270","wikidata":"https://www.wikidata.org/wiki/Q733507","display_name":"Floating-point unit","level":3,"score":0.6034814119338989},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.5896164178848267},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.4931725859642029},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.45453861355781555},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4187050461769104},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35933780670166016},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1749182641506195},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.0931670069694519},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iiswc.2017.8167778","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iiswc.2017.8167778","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE International Symposium on Workload Characterization (IISWC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320308928","display_name":"University of Delaware","ror":"https://ror.org/01sbq1a82"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1871270088","https://openalex.org/W1969213662","https://openalex.org/W1971519595","https://openalex.org/W1977336460","https://openalex.org/W2009694692","https://openalex.org/W2012407419","https://openalex.org/W2019045525","https://openalex.org/W2052942864","https://openalex.org/W2061091230","https://openalex.org/W2068214544","https://openalex.org/W2079450792","https://openalex.org/W2081368694","https://openalex.org/W2082349392","https://openalex.org/W2098077956","https://openalex.org/W2103953062","https://openalex.org/W2104380208","https://openalex.org/W2106191211","https://openalex.org/W2110195531","https://openalex.org/W2155673213","https://openalex.org/W2155963553","https://openalex.org/W2169004268","https://openalex.org/W2207050309","https://openalex.org/W2315126270","https://openalex.org/W2480266340","https://openalex.org/W2556862623","https://openalex.org/W2566629575","https://openalex.org/W2899141212","https://openalex.org/W6635250703"],"related_works":["https://openalex.org/W2116803521","https://openalex.org/W2773283032","https://openalex.org/W2363539709","https://openalex.org/W3215589575","https://openalex.org/W1564887326","https://openalex.org/W2797902698","https://openalex.org/W2169016399","https://openalex.org/W2551094850","https://openalex.org/W2930605373","https://openalex.org/W2185787149"],"abstract_inverted_index":{"Floating-point":[0],"computations":[1],"produce":[2],"approximate":[3],"results,":[4],"possibly":[5],"leading":[6,182],"to":[7,30,126,172,175,183],"inaccuracy":[8],"and":[9,34,48,66,77,88,99,108,137,160,193,200],"reproducibility":[10],"problems.":[11],"Existing":[12],"work":[13],"addresses":[14],"two":[15],"issues:":[16],"first,":[17],"the":[18,26,44,58,95,111,122,127,144,176,179,190,201,206],"design":[19],"of":[20,28,36,43,60,86,113,129,147,166,178,203,205],"high":[21],"precision":[22,69,101,181],"floating-point":[23,62,68,79,102,115],"representations;":[24],"second,":[25],"study":[27,42,56,121],"methods":[29],"trade":[31],"off":[32],"accuracy":[33,47],"performance":[35,49],"CPU":[37],"applications.":[38,91],"However,":[39],"a":[40,84,164,184],"comprehensive":[41],"tradeoffs":[45,124],"between":[46],"on":[50,81,104,117,133,151,189],"modern":[51],"GPUs":[52],"is":[53],"missing.":[54],"This":[55],"covers":[57],"use":[59,128,146],"different":[61,105,130,148,157],"precisions":[63,132],"(i.e.,":[64],"single":[65,98],"double":[67,100],"in":[70,197],"IEEE":[71],"754":[72],"standard,":[73],"GNU":[74],"Multiple":[75],"Precision,":[76],"composite":[78],"precision)":[80],"GPU":[82,106],"using":[83],"variety":[85],"synthetic":[87],"real-world":[89],"benchmark":[90,153],"First,":[92],"we":[93,109,120,142,169],"analyze":[94,143],"support":[96],"for":[97],"arithmetic":[103,131,149,161,180,191],"architectures,":[107],"characterize":[110],"latencies":[112],"all":[114],"instructions":[116],"GPU.":[118],"Second,":[119],"performance/accuracy":[123,186],"related":[125],"addition,":[134],"multiplication,":[135],"division,":[136],"natural":[138],"exponential":[139],"function.":[140],"Third,":[141],"combined":[145],"operations":[150,192],"three":[152],"applications":[154],"characterized":[155],"by":[156],"instruction":[158],"mixes":[159],"intensities.":[162],"As":[163],"result":[165],"this":[167],"analysis,":[168],"provide":[170],"insights":[171],"guide":[173],"users":[174],"selection":[177],"good":[185],"tradeoff":[187],"depending":[188],"mathematical":[194],"functions":[195],"used":[196],"their":[198],"program":[199],"degree":[202],"multithreading":[204],"code.":[207]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
