{"id":"https://openalex.org/W3039972296","doi":"https://doi.org/10.1145/3392717.3392761","title":"Fast, accurate, and scalable memory modeling of GPGPUs using reuse profiles","display_name":"Fast, accurate, and scalable memory modeling of GPGPUs using reuse profiles","publication_year":2020,"publication_date":"2020-06-29","ids":{"openalex":"https://openalex.org/W3039972296","doi":"https://doi.org/10.1145/3392717.3392761","mag":"3039972296"},"language":"en","primary_location":{"id":"doi:10.1145/3392717.3392761","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3392717.3392761","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001017727","display_name":"Yehia Arafa","orcid":"https://orcid.org/0000-0002-5186-3012"},"institutions":[{"id":"https://openalex.org/I10052268","display_name":"New Mexico State University","ror":"https://ror.org/00hpz7z43","country_code":"US","type":"education","lineage":["https://openalex.org/I10052268"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yehia Arafa","raw_affiliation_strings":["New Mexico State University"],"affiliations":[{"raw_affiliation_string":"New Mexico State University","institution_ids":["https://openalex.org/I10052268"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087778749","display_name":"Abdel\u2010Hameed A. Badawy","orcid":"https://orcid.org/0000-0001-8027-1449"},"institutions":[{"id":"https://openalex.org/I10052268","display_name":"New Mexico State University","ror":"https://ror.org/00hpz7z43","country_code":"US","type":"education","lineage":["https://openalex.org/I10052268"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abdel-Hameed Badawy","raw_affiliation_strings":["New Mexico State University"],"affiliations":[{"raw_affiliation_string":"New Mexico State University","institution_ids":["https://openalex.org/I10052268"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063725454","display_name":"Gopinath Chennupati","orcid":"https://orcid.org/0000-0002-6223-8570"},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gopinath Chennupati","raw_affiliation_strings":["Los Alamos National Laboratory"],"affiliations":[{"raw_affiliation_string":"Los Alamos National Laboratory","institution_ids":["https://openalex.org/I1343871089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074965377","display_name":"Atanu Barai","orcid":"https://orcid.org/0000-0002-6879-4455"},"institutions":[{"id":"https://openalex.org/I10052268","display_name":"New Mexico State University","ror":"https://ror.org/00hpz7z43","country_code":"US","type":"education","lineage":["https://openalex.org/I10052268"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Atanu Barai","raw_affiliation_strings":["New Mexico State University"],"affiliations":[{"raw_affiliation_string":"New Mexico State University","institution_ids":["https://openalex.org/I10052268"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053809539","display_name":"Nandakishore Santhi","orcid":"https://orcid.org/0000-0002-4755-7821"},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nandakishore Santhi","raw_affiliation_strings":["Los Alamos National Laboratory"],"affiliations":[{"raw_affiliation_string":"Los Alamos National Laboratory","institution_ids":["https://openalex.org/I1343871089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073911119","display_name":"Stephan Eidenbenz","orcid":"https://orcid.org/0000-0002-2628-1854"},"institutions":[{"id":"https://openalex.org/I1343871089","display_name":"Los Alamos National Laboratory","ror":"https://ror.org/01e41cf67","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I1343871089","https://openalex.org/I198811213","https://openalex.org/I4210120050"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stephan Eidenbenz","raw_affiliation_strings":["Los Alamos National Laboratory"],"affiliations":[{"raw_affiliation_string":"Los Alamos National Laboratory","institution_ids":["https://openalex.org/I1343871089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5001017727"],"corresponding_institution_ids":["https://openalex.org/I10052268"],"apc_list":null,"apc_paid":null,"fwci":2.5921,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.89792753,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8919142484664917},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7277650237083435},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.7069637179374695},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6310833096504211},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5595210790634155},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.4838249087333679},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4715048372745514},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.45687049627304077},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.44667360186576843},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.44289225339889526},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.43554559350013733},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3319958448410034},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.19224756956100464}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8919142484664917},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7277650237083435},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.7069637179374695},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6310833096504211},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5595210790634155},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.4838249087333679},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4715048372745514},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.45687049627304077},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.44667360186576843},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.44289225339889526},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.43554559350013733},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3319958448410034},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.19224756956100464},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3392717.3392761","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3392717.3392761","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},{"id":"mag:3134552883","is_oa":false,"landing_page_url":"https://jglobal.jst.go.jp/en/detail?JGLOBAL_ID=202002237451849258","pdf_url":null,"source":{"id":"https://openalex.org/S4306500161","display_name":"ACM Proceedings","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"ACM Proceedings","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.4099999964237213,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1968902482","https://openalex.org/W1972077953","https://openalex.org/W1979527452","https://openalex.org/W2033486618","https://openalex.org/W2062527253","https://openalex.org/W2062840343","https://openalex.org/W2065705265","https://openalex.org/W2079038734","https://openalex.org/W2080592089","https://openalex.org/W2082450110","https://openalex.org/W2098290747","https://openalex.org/W2101320423","https://openalex.org/W2104840748","https://openalex.org/W2109814389","https://openalex.org/W2121288477","https://openalex.org/W2128120785","https://openalex.org/W2134004505","https://openalex.org/W2142444503","https://openalex.org/W2144954274","https://openalex.org/W2153456949","https://openalex.org/W2156858199","https://openalex.org/W2171582069","https://openalex.org/W2574647542","https://openalex.org/W2605443074","https://openalex.org/W2727397546","https://openalex.org/W2777727847","https://openalex.org/W2785384037","https://openalex.org/W2903738101","https://openalex.org/W2921788688","https://openalex.org/W2937249639","https://openalex.org/W2947179428","https://openalex.org/W2979340153","https://openalex.org/W2991330024","https://openalex.org/W2998901775","https://openalex.org/W3006451002","https://openalex.org/W3030672572","https://openalex.org/W3098742859","https://openalex.org/W4238930179","https://openalex.org/W4239813889","https://openalex.org/W4241057782","https://openalex.org/W4245867598","https://openalex.org/W4250244364","https://openalex.org/W4251280768","https://openalex.org/W4299029681"],"related_works":["https://openalex.org/W2769189194","https://openalex.org/W1537323515","https://openalex.org/W2353852602","https://openalex.org/W2120249721","https://openalex.org/W2133682266","https://openalex.org/W2497617944","https://openalex.org/W2167303720","https://openalex.org/W1563139915","https://openalex.org/W2109715593","https://openalex.org/W2061075966"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,180],"introduce":[4],"an":[5],"accurate":[6],"and":[7,73,93,103,107,116,142,153,191],"scalable":[8],"memory":[9,37,49,68,96],"modeling":[10],"framework":[11,110],"for":[12,25,51,162,187,192],"General":[13],"Purpose":[14],"Graphics":[15],"Processor":[16],"units":[17],"(GPGPUs),":[18],"PPT-GPU-Mem.":[19],"That":[20],"is":[21,71],"Performance":[22],"Prediction":[23],"Tool-Kit":[24],"GPUs":[26],"Cache":[27],"Memories.":[28],"PPT-GPU-Mem":[29,82,183],"predicts":[30],"the":[31,60,94,163,173,177,194],"performance":[32,196],"of":[33,197],"different":[34,112,164],"GPUs'":[35],"cache":[36,165,195],"hierarchy":[38],"(L1":[39],"&":[40],"L2)":[41],"based":[42],"on":[43,77],"reuse":[44],"profiles.":[45],"We":[46,98,132],"extract":[47],"a":[48],"trace":[50,69],"each":[52],"GPU":[53,88],"kernel":[54],"once":[55],"in":[56,120,175],"its":[57],"lifetime":[58],"using":[59,111],"recently":[61],"released":[62],"binary":[63],"instrumentation":[64],"tool,":[65],"NVBIT.":[66],"The":[67],"extraction":[70],"architecture-independent":[72],"can":[74,83,184],"be":[75,185],"done":[76],"any":[78,86],"available":[79],"NVIDIA":[80,87],"GPU.":[81],"then":[84],"model":[85,99],"caches":[89],"given":[90],"their":[91],"parameters":[92],"extracted":[95],"trace.":[97],"Volta":[100],"Tesla":[101],"V100":[102],"Turing":[104],"TITAN":[105],"RTX":[106],"validate":[108],"our":[109],"kernels":[113],"from":[114,127,158],"Polybench":[115],"Rodinia":[117],"benchmark":[118,130],"suites":[119],"addition":[121],"to":[122,160,168],"two":[123,134],"deep":[124],"learning":[125],"applications":[126],"Tango":[128],"DNN":[129],"suite.":[131],"provide":[133],"models,":[135],"MBRDP":[136],"(Multiple":[137],"Block":[138,145],"Reuse":[139,146],"Distance":[140,147],"Profile)":[141],"OBRDP":[143],"(One":[144],"Profile),":[148],"with":[149],"varying":[150],"assumptions,":[151],"accuracy,":[152],"speed.":[154],"Our":[155],"accuracy":[156],"ranges":[157],"92%":[159],"99%":[161],"levels":[166],"compared":[167],"real":[169],"hardware":[170],"while":[171],"maintaining":[172],"scalability":[174],"producing":[176],"results.":[178],"Finally,":[179],"illustrate":[181],"that":[182],"used":[186],"design":[188],"space":[189],"exploration":[190],"predicting":[193],"future":[198],"GPUs.":[199]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
