{"id":"https://openalex.org/W2529649238","doi":"https://doi.org/10.1109/iiswc.2016.7581287","title":"Characterizing memory bottlenecks in GPGPU workloads","display_name":"Characterizing memory bottlenecks in GPGPU workloads","publication_year":2016,"publication_date":"2016-09-01","ids":{"openalex":"https://openalex.org/W2529649238","doi":"https://doi.org/10.1109/iiswc.2016.7581287","mag":"2529649238"},"language":"en","primary_location":{"id":"doi:10.1109/iiswc.2016.7581287","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iiswc.2016.7581287","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Symposium on Workload Characterization (IISWC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.research.ed.ac.uk/en/publications/3c6e2f34-500c-4463-af50-78541b09336f","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031450756","display_name":"Saumay Dublish","orcid":null},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Saumay Dublish","raw_affiliation_strings":["University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056071760","display_name":"Vijay Nagarajan","orcid":"https://orcid.org/0009-0000-5045-4754"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Vijay Nagarajan","raw_affiliation_strings":["University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027477866","display_name":"Nigel Topham","orcid":"https://orcid.org/0000-0002-6310-0602"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Nigel Topham","raw_affiliation_strings":["University of Edinburgh"],"affiliations":[{"raw_affiliation_string":"University of Edinburgh","institution_ids":["https://openalex.org/I98677209"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5031450756"],"corresponding_institution_ids":["https://openalex.org/I98677209"],"apc_list":null,"apc_paid":null,"fwci":2.2073,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.87455332,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8266319036483765},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8257226347923279},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.688989520072937},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.6710163950920105},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.6534885168075562},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.616491973400116},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6011106967926025},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.476445734500885},{"id":"https://openalex.org/keywords/non-uniform-memory-access","display_name":"Non-uniform memory access","score":0.4557052552700043},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.44110703468322754},{"id":"https://openalex.org/keywords/cache-only-memory-architecture","display_name":"Cache-only memory architecture","score":0.42355605959892273},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.3970063328742981},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.3909328579902649},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3712407946586609},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.34166401624679565},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.25800254940986633},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2030976116657257},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.15584927797317505}],"concepts":[{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8266319036483765},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8257226347923279},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.688989520072937},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.6710163950920105},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.6534885168075562},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.616491973400116},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6011106967926025},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.476445734500885},{"id":"https://openalex.org/C133371097","wikidata":"https://www.wikidata.org/wiki/Q868014","display_name":"Non-uniform memory access","level":5,"score":0.4557052552700043},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.44110703468322754},{"id":"https://openalex.org/C3720319","wikidata":"https://www.wikidata.org/wiki/Q5015937","display_name":"Cache-only memory architecture","level":5,"score":0.42355605959892273},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.3970063328742981},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.3909328579902649},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3712407946586609},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.34166401624679565},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.25800254940986633},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2030976116657257},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.15584927797317505},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/iiswc.2016.7581287","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iiswc.2016.7581287","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Symposium on Workload Characterization (IISWC)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.ed.ac.uk:openaire/3c6e2f34-500c-4463-af50-78541b09336f","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/3c6e2f34-500c-4463-af50-78541b09336f","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Dublish, S, Nagarajan, V & Topham, N 2016, Characterizing memory bottlenecks in GPGPU workloads. in 2016 IEEE International Symposium on Workload Characterization (IISWC). Institute of Electrical and Electronics Engineers, Providence, RI, USA, pp. 1-2, 2016 IEEE International Symposium on Workload Characterization, Providence, Rhode Island, United States, 25/09/16. https://doi.org/10.1109/IISWC.2016.7581287","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.ed.ac.uk:publications/3c6e2f34-500c-4463-af50-78541b09336f","is_oa":false,"landing_page_url":"http://hdl.handle.net/20.500.11820/3c6e2f34-500c-4463-af50-78541b09336f","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"pmh:oai:pure.ed.ac.uk:openaire/3c6e2f34-500c-4463-af50-78541b09336f","is_oa":true,"landing_page_url":"https://www.research.ed.ac.uk/en/publications/3c6e2f34-500c-4463-af50-78541b09336f","pdf_url":null,"source":{"id":"https://openalex.org/S4406922455","display_name":"Edinburgh Research Explorer","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Dublish, S, Nagarajan, V & Topham, N 2016, Characterizing memory bottlenecks in GPGPU workloads. in 2016 IEEE International Symposium on Workload Characterization (IISWC). Institute of Electrical and Electronics Engineers, Providence, RI, USA, pp. 1-2, 2016 IEEE International Symposium on Workload Characterization, Providence, Rhode Island, United States, 25/09/16. https://doi.org/10.1109/IISWC.2016.7581287","raw_type":"contributionToPeriodical"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W2053744175","https://openalex.org/W2147002477","https://openalex.org/W2315868086"],"related_works":["https://openalex.org/W2807409404","https://openalex.org/W2606565524","https://openalex.org/W4313496011","https://openalex.org/W2350803493","https://openalex.org/W2735130281","https://openalex.org/W3111801817","https://openalex.org/W2570594754","https://openalex.org/W1531802798","https://openalex.org/W3172228199","https://openalex.org/W79990711"],"abstract_inverted_index":{"GPUs":[0,57],"are":[1],"often":[2],"limited":[3],"by":[4,74,88],"the":[5,10,24,28,32,53,60,69,76,80,85,107,132,140],"off-chip":[6,29,100],"memory":[7,46,61,90,141],"bandwidth.":[8],"With":[9],"advent":[11],"of":[12,45,139],"general-purpose":[13,64],"computing":[14],"on":[15],"GPUs,":[16],"cache":[17,33,81,95],"hierarchy":[18,34,62,82,96],"has":[19],"been":[20],"introduced":[21],"to":[22,27,130],"filter":[23],"bandwidth":[25,38,54,77,99,108,133],"demand":[26],"memory.":[30,101],"However,":[31],"presents":[35],"its":[36],"own":[37],"limitations":[39],"in":[40,56,71,79,110],"sustaining":[41],"such":[42],"high":[43,98],"levels":[44,114,138],"traffic.":[47],"In":[48],"this":[49],"work,":[50],"we":[51,124],"characterize":[52],"bottleneck":[55,78,109,134],"present":[58],"across":[59,136],"for":[63],"applications.":[65],"We":[66,102],"show":[67,104,125],"that":[68,105,126],"improvement":[70],"performance":[72],"achieved":[73],"mitigating":[75],"can":[83,115,119],"exceed":[84],"speedup":[86],"obtained":[87],"a":[89,93],"system":[91],"with":[92],"baseline":[94],"and":[97,118],"also":[103],"addressing":[106],"isolation":[111],"at":[112],"specific":[113],"be":[116,121],"sub-optimal":[117],"even":[120],"counter-productive.":[122],"Therefore,":[123],"it":[127],"is":[128],"imperative":[129],"resolve":[131],"synergistically":[135],"different":[137],"hierarchy.":[142]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":3},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
