{"id":"https://openalex.org/W2979340153","doi":"https://doi.org/10.1145/3352460.3358307","title":"NVBit","display_name":"NVBit","publication_year":2019,"publication_date":"2019-10-11","ids":{"openalex":"https://openalex.org/W2979340153","doi":"https://doi.org/10.1145/3352460.3358307","mag":"2979340153"},"language":"en","primary_location":{"id":"doi:10.1145/3352460.3358307","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3352460.3358307","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111373927","display_name":"Oreste Villa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oreste Villa","raw_affiliation_strings":["NVIDIA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015391591","display_name":"Mark W. Stephenson","orcid":"https://orcid.org/0000-0002-1350-0165"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mark Stephenson","raw_affiliation_strings":["NVIDIA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031781240","display_name":"David Nellans","orcid":"https://orcid.org/0000-0001-5203-8367"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David Nellans","raw_affiliation_strings":["NVIDIA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063354509","display_name":"Stephen W. Keckler","orcid":"https://orcid.org/0000-0001-6701-6099"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stephen W. Keckler","raw_affiliation_strings":["NVIDIA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":16.0423,"has_fulltext":false,"cited_by_count":159,"citation_normalized_percentile":{"value":0.99607496,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"372","last_page":"383"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11005","display_name":"Radiation Effects in Electronics","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8301180601119995},{"id":"https://openalex.org/keywords/instrumentation","display_name":"Instrumentation (computer programming)","score":0.8247637152671814},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.7732797265052795},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6339774131774902},{"id":"https://openalex.org/keywords/pascal","display_name":"Pascal (unit)","score":0.552994966506958},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4485010504722595},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.4337317645549774},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.39545539021492004},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.38141578435897827},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.358271062374115},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3580514192581177},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.27012813091278076}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8301180601119995},{"id":"https://openalex.org/C118530786","wikidata":"https://www.wikidata.org/wiki/Q1134732","display_name":"Instrumentation (computer programming)","level":2,"score":0.8247637152671814},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.7732797265052795},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6339774131774902},{"id":"https://openalex.org/C75608658","wikidata":"https://www.wikidata.org/wiki/Q44395","display_name":"Pascal (unit)","level":2,"score":0.552994966506958},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4485010504722595},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.4337317645549774},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.39545539021492004},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.38141578435897827},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.358271062374115},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3580514192581177},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.27012813091278076},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3352460.3358307","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3352460.3358307","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W753012316","https://openalex.org/W1483717008","https://openalex.org/W1562102969","https://openalex.org/W1789336918","https://openalex.org/W1957875051","https://openalex.org/W1984222112","https://openalex.org/W1988070283","https://openalex.org/W1989773959","https://openalex.org/W2005795572","https://openalex.org/W2019326155","https://openalex.org/W2040722951","https://openalex.org/W2047226031","https://openalex.org/W2098290747","https://openalex.org/W2099891946","https://openalex.org/W2102674270","https://openalex.org/W2105672886","https://openalex.org/W2115824361","https://openalex.org/W2132635145","https://openalex.org/W2134633067","https://openalex.org/W2151551151","https://openalex.org/W2155893237","https://openalex.org/W2155943969","https://openalex.org/W2156858199","https://openalex.org/W2419597278","https://openalex.org/W2507749557","https://openalex.org/W2610467942","https://openalex.org/W2735162286","https://openalex.org/W2758843708","https://openalex.org/W2921866009","https://openalex.org/W4233556486","https://openalex.org/W4239813889","https://openalex.org/W4248649658"],"related_works":["https://openalex.org/W3062287","https://openalex.org/W2380390332","https://openalex.org/W2742145873","https://openalex.org/W4245975140","https://openalex.org/W2062253548","https://openalex.org/W4225414539","https://openalex.org/W4289522463","https://openalex.org/W1977763331","https://openalex.org/W2103136046","https://openalex.org/W2022120297"],"abstract_inverted_index":{"Binary":[0],"instrumentation":[1,20,47,69,76],"frameworks":[2],"are":[3,27,53],"widely":[4],"used":[5],"to":[6,74,85,107,119,135],"implement":[7],"profilers,":[8],"performance":[9],"evaluation,":[10],"error":[11],"checking,":[12],"and":[13,25,66,80,88,160,175,177],"bug":[14],"detection":[15],"tools.":[16],"While":[17],"dynamic":[18,95,145],"binary":[19,68],"tools":[21,77],"such":[22],"as":[23],"PIN":[24],"DynamoRio":[26],"supported":[28],"on":[29,91,179],"CPUs,":[30],"GPU":[31,103,126,168],"architectures":[32],"currently":[33],"only":[34],"have":[35,120],"limited":[36],"support":[37],"for":[38,55],"similar":[39],"capabilities":[40],"through":[41],"static":[42],"compile-time":[43],"tools,":[44],"which":[45],"prohibits":[46],"of":[48,123,140,147,154],"dynamically":[49],"loaded":[50],"libraries":[51,89],"that":[52,71,83],"foundations":[54],"modern":[56],"high-performance":[57],"applications.":[58],"This":[59],"work":[60],"presents":[61],"NVBit,":[62],"a":[63],"fast,":[64],"dynamic,":[65],"portable,":[67],"framework,":[70],"allows":[72,129],"users":[73],"write":[75],"in":[78],"CUDA/C/C++":[79],"selectively":[81],"apply":[82],"functionality":[84],"pre-compiled":[86,181],"binaries":[87],"executing":[90],"NVIDIA":[92,167],"GPUs.":[93],"Using":[94],"recompilation":[96],"at":[97],"the":[98,116,124,136],"SASS":[99],"level,":[100],"NVBit":[101,128,163],"analyzes":[102],"kernel":[104],"register":[105,155],"requirements":[106],"generate":[108],"efficient":[109],"ABI":[110],"compliant":[111],"instrumented":[112,148],"code":[113,158],"without":[114],"requiring":[115],"tool":[117],"developer":[118],"detailed":[121],"knowledge":[122],"underlying":[125],"architecture.":[127],"basic-block":[130],"instrumentation,":[131],"multiple":[132],"function":[133],"injections":[134],"same":[137],"location,":[138],"inspection":[139],"all":[141,165],"ISA":[142],"visible":[143],"state,":[144,156],"selection":[146],"or":[149,185],"uninstrumented":[150],"code,":[151],"permanent":[152],"modification":[153],"source":[157],"correlation,":[159],"instruction":[161],"removal.":[162],"supports":[164],"recent":[166],"architecture":[169],"families":[170],"including":[171],"Kepler,":[172],"Maxwell,":[173],"Pascal":[174],"Volta":[176],"works":[178],"any":[180],"CUDA,":[182],"OpenACC,":[183],"OpenCL,":[184],"CUDA-Fortran":[186],"application.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":11},{"year":2025,"cited_by_count":35},{"year":2024,"cited_by_count":22},{"year":2023,"cited_by_count":26},{"year":2022,"cited_by_count":20},{"year":2021,"cited_by_count":29},{"year":2020,"cited_by_count":16}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2019-10-18T00:00:00"}
