{"id":"https://openalex.org/W2971610910","doi":"https://doi.org/10.1109/hpec.2019.8916466","title":"Low Overhead Instruction Latency Characterization for NVIDIA GPGPUs","display_name":"Low Overhead Instruction Latency Characterization for NVIDIA GPGPUs","publication_year":2019,"publication_date":"2019-09-01","ids":{"openalex":"https://openalex.org/W2971610910","doi":"https://doi.org/10.1109/hpec.2019.8916466","mag":"2971610910"},"language":"en","primary_location":{"id":"doi:10.1109/hpec.2019.8916466","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2019.8916466","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1905.08778","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001017727","display_name":"Yehia Arafa","orcid":"https://orcid.org/0000-0002-5186-3012"},"institutions":[{"id":"https://openalex.org/I10052268","display_name":"New Mexico State University","ror":"https://ror.org/00hpz7z43","country_code":"US","type":"education","lineage":["https://openalex.org/I10052268"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yehia Arafa","raw_affiliation_strings":["Klipsch School of ECE, New Mexico State University, Las Cruces, NM, USA","New Mexico State University,Klipsch School of ECE,Las Cruces,NM,USA"],"affiliations":[{"raw_affiliation_string":"Klipsch School of ECE, New Mexico State University, Las Cruces, NM, USA","institution_ids":["https://openalex.org/I10052268"]},{"raw_affiliation_string":"New Mexico State University,Klipsch School of ECE,Las Cruces,NM,USA","institution_ids":["https://openalex.org/I10052268"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087778749","display_name":"Abdel\u2010Hameed A. Badawy","orcid":"https://orcid.org/0000-0001-8027-1449"},"institutions":[{"id":"https://openalex.org/I10052268","display_name":"New Mexico State University","ror":"https://ror.org/00hpz7z43","country_code":"US","type":"education","lineage":["https://openalex.org/I10052268"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abdel-Hameed A. Badawy","raw_affiliation_strings":["Klipsch School of ECE, New Mexico State University, Las Cruces, NM, USA","New Mexico State University,Klipsch School of ECE,Las Cruces,NM,USA"],"affiliations":[{"raw_affiliation_string":"Klipsch School of ECE, New Mexico State University, Las Cruces, NM, USA","institution_ids":["https://openalex.org/I10052268"]},{"raw_affiliation_string":"New Mexico State University,Klipsch School of ECE,Las Cruces,NM,USA","institution_ids":["https://openalex.org/I10052268"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063725454","display_name":"Gopinath Chennupati","orcid":"https://orcid.org/0000-0002-6223-8570"},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gopinath Chennupati","raw_affiliation_strings":["Los Alamos National Laboratory, Los Alamos, NM, USA","Los Alamos National Laboratory, Los Alamos, NM USA"],"affiliations":[{"raw_affiliation_string":"Los Alamos National Laboratory, Los Alamos, NM, USA","institution_ids":["https://openalex.org/I1343871089"]},{"raw_affiliation_string":"Los Alamos National Laboratory, Los Alamos, NM USA","institution_ids":["https://openalex.org/I1343871089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053809539","display_name":"Nandakishore Santhi","orcid":"https://orcid.org/0000-0002-4755-7821"},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nandakishore Santhi","raw_affiliation_strings":["Los Alamos National Laboratory, Los Alamos, NM, USA","Los Alamos National Laboratory, Los Alamos, NM USA"],"affiliations":[{"raw_affiliation_string":"Los Alamos National Laboratory, Los Alamos, NM, USA","institution_ids":["https://openalex.org/I1343871089"]},{"raw_affiliation_string":"Los Alamos National Laboratory, Los Alamos, NM USA","institution_ids":["https://openalex.org/I1343871089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073911119","display_name":"Stephan Eidenbenz","orcid":"https://orcid.org/0000-0002-2628-1854"},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stephan Eidenbenz","raw_affiliation_strings":["Los Alamos National Laboratory, Los Alamos, NM, USA","Los Alamos National Laboratory, Los Alamos, NM USA"],"affiliations":[{"raw_affiliation_string":"Los Alamos National Laboratory, Los Alamos, NM, USA","institution_ids":["https://openalex.org/I1343871089"]},{"raw_affiliation_string":"Los Alamos National Laboratory, Los Alamos, NM USA","institution_ids":["https://openalex.org/I1343871089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5001017727"],"corresponding_institution_ids":["https://openalex.org/I10052268"],"apc_list":null,"apc_paid":null,"fwci":0.246,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.50369709,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8830727338790894},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7745920419692993},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6764153242111206},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.629718542098999},{"id":"https://openalex.org/keywords/pascal","display_name":"Pascal (unit)","score":0.626092255115509},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5711941719055176},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.5119927525520325},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4830487370491028},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.43136849999427795},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4093644320964813},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3077754080295563},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.13666880130767822}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8830727338790894},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7745920419692993},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6764153242111206},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.629718542098999},{"id":"https://openalex.org/C75608658","wikidata":"https://www.wikidata.org/wiki/Q44395","display_name":"Pascal (unit)","level":2,"score":0.626092255115509},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5711941719055176},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.5119927525520325},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4830487370491028},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.43136849999427795},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4093644320964813},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3077754080295563},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.13666880130767822}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/hpec.2019.8916466","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec.2019.8916466","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1905.08778","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.08778","pdf_url":"https://arxiv.org/pdf/1905.08778","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2971610910","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1905.08778.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1905.08778","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1905.08778","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1905.08778","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1905.08778","pdf_url":"https://arxiv.org/pdf/1905.08778","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.5299999713897705,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2971610910.pdf","grobid_xml":"https://content.openalex.org/works/W2971610910.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W645173616","https://openalex.org/W1902930330","https://openalex.org/W1979527452","https://openalex.org/W1984222112","https://openalex.org/W2017579069","https://openalex.org/W2026764611","https://openalex.org/W2063186542","https://openalex.org/W2090593986","https://openalex.org/W2093683727","https://openalex.org/W2093843662","https://openalex.org/W2098290747","https://openalex.org/W2100218206","https://openalex.org/W2110195531","https://openalex.org/W2129232868","https://openalex.org/W2129806488","https://openalex.org/W2163687928","https://openalex.org/W2183420951","https://openalex.org/W2554131156","https://openalex.org/W2554342856","https://openalex.org/W2580538010","https://openalex.org/W2766338242","https://openalex.org/W2788201686","https://openalex.org/W2789572737","https://openalex.org/W2796649226","https://openalex.org/W2798296237","https://openalex.org/W2805203449","https://openalex.org/W2906684012","https://openalex.org/W2915018956","https://openalex.org/W2921788688","https://openalex.org/W2963112338","https://openalex.org/W3141650078","https://openalex.org/W6673829052","https://openalex.org/W6686093555","https://openalex.org/W6745245109","https://openalex.org/W6750309632","https://openalex.org/W6750448596"],"related_works":["https://openalex.org/W2991330024","https://openalex.org/W1491341706","https://openalex.org/W2484342675","https://openalex.org/W160245670","https://openalex.org/W2906684012","https://openalex.org/W2000438057","https://openalex.org/W2185243947","https://openalex.org/W2770954455","https://openalex.org/W3157374924","https://openalex.org/W2017579069","https://openalex.org/W2987884568","https://openalex.org/W1531300964","https://openalex.org/W2805203449","https://openalex.org/W2769646054","https://openalex.org/W2015048865","https://openalex.org/W1982123268","https://openalex.org/W2401716846","https://openalex.org/W2156149687","https://openalex.org/W27540008","https://openalex.org/W1590584831"],"abstract_inverted_index":{"The":[0,142],"last":[1],"decade":[2],"has":[3,15],"seen":[4],"a":[5,69],"shift":[6],"in":[7,25,84,98,144,164],"the":[8,46,52,78,85,89,93,101,107,110,113,119,156,166],"computer":[9],"systems":[10],"industry":[11],"where":[12],"heterogeneous":[13],"computing":[14,42],"become":[16],"prevalent.":[17],"Graphics":[18],"Processing":[19],"Units":[20],"(GPUs)":[21],"are":[22,33],"now":[23],"present":[24],"supercomputers":[26],"to":[27,44,150,176],"mobile":[28],"phones":[29],"and":[30,73,88,140],"tablets.":[31],"GPUs":[32,99,131],"used":[34],"for":[35,76],"graphics":[36],"operations":[37],"as":[38,40],"well":[39],"general-purpose":[41],"(GPGPUs)":[43],"boost":[45],"performance":[47],"of":[48,54,80,92,109,155,158],"compute-intensive":[49],"applications.":[50,178],"However,":[51],"percentage":[53],"undisclosed":[55],"characteristics":[56],"beyond":[57],"what":[58],"vendors":[59],"provide":[60],"is":[61],"not":[62],"small.":[63],"In":[64],"this":[65,145],"paper,":[66],"we":[67,105],"introduce":[68],"very":[70],"low":[71],"overhead":[72,91],"portable":[74],"analysis":[75],"exposing":[77],"latency":[79],"each":[81],"instruction":[82],"executing":[83],"GPU":[86],"pipeline(s)":[87],"access":[90],"various":[94,111,120],"memory":[95],"hierarchies":[96],"found":[97],"at":[100],"micro-architecture":[102],"level.":[103],"Furthermore,":[104],"show":[106],"impact":[108],"optimizations":[112,175],"CUDA":[114],"compiler":[115],"can":[116,147,172],"perform":[117,123,173],"over":[118],"latencies.":[121],"We":[122],"our":[124],"evaluation":[125],"on":[126],"seven":[127],"different":[128,134],"high-end":[129],"NVIDIA":[130],"from":[132],"five":[133],"generations/architectures:":[135],"Kepler,":[136],"Maxwell,":[137],"Pascal,":[138],"Volta,":[139],"Turing.":[141],"results":[143],"paper":[146],"help":[148,163],"architects":[149],"have":[151],"an":[152],"accurate":[153],"characterization":[154],"latencies":[157],"these":[159],"GPUs,":[160],"which":[161],"will":[162],"modeling":[165],"hardware":[167],"accurately.":[168],"Also,":[169],"software":[170],"developers":[171],"informed":[174],"their":[177]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
