{"id":"https://openalex.org/W4226062505","doi":"https://doi.org/10.1007/s10766-022-00729-2","title":"A Quantitative Study of Locality in GPU Caches for Memory-Divergent Workloads","display_name":"A Quantitative Study of Locality in GPU Caches for Memory-Divergent Workloads","publication_year":2022,"publication_date":"2022-04-01","ids":{"openalex":"https://openalex.org/W4226062505","doi":"https://doi.org/10.1007/s10766-022-00729-2"},"language":"en","primary_location":{"id":"doi:10.1007/s10766-022-00729-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10766-022-00729-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10766-022-00729-2.pdf","source":{"id":"https://openalex.org/S148521650","display_name":"International Journal of Parallel Programming","issn_l":"0885-7458","issn":["0885-7458","1573-7640"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Parallel Programming","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10766-022-00729-2.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083072467","display_name":"Sohan Lal","orcid":"https://orcid.org/0000-0002-2325-1705"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]},{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]},{"id":"https://openalex.org/I884043246","display_name":"Hamburg University of Technology","ror":"https://ror.org/04bs1pb34","country_code":"DE","type":"education","lineage":["https://openalex.org/I884043246"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Sohan Lal","raw_affiliation_strings":["Technische Universit\u00e4t Berlin, Berlin, Germany","Technische Universit\u00e4t Hamburg, Hamburg, Germany"],"raw_orcid":"https://orcid.org/0000-0002-2325-1705","affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]},{"raw_affiliation_string":"Technische Universit\u00e4t Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309","https://openalex.org/I884043246"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114577218","display_name":"Bogaraju Sharatchandra Varma","orcid":null},"institutions":[{"id":"https://openalex.org/I138801177","display_name":"University of Ulster","ror":"https://ror.org/01yp9g959","country_code":"GB","type":"education","lineage":["https://openalex.org/I138801177"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Bogaraju Sharatchandra Varma","raw_affiliation_strings":["Ulster University, Jordanstown, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ulster University, Jordanstown, UK","institution_ids":["https://openalex.org/I138801177"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112321355","display_name":"Ben Juurlink","orcid":null},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ben Juurlink","raw_affiliation_strings":["Technische Universit\u00e4t Berlin, Berlin, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5083072467"],"corresponding_institution_ids":["https://openalex.org/I159176309","https://openalex.org/I4577782","https://openalex.org/I884043246"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890},"fwci":1.8652,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.83928251,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":97},"biblio":{"volume":"50","issue":"2","first_page":"189","last_page":"216"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8803503513336182},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.8127037286758423},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.7733581066131592},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7440217137336731},{"id":"https://openalex.org/keywords/thrashing","display_name":"Thrashing","score":0.7100168466567993},{"id":"https://openalex.org/keywords/locality-of-reference","display_name":"Locality of reference","score":0.6660510301589966},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.6354632377624512},{"id":"https://openalex.org/keywords/cache-pollution","display_name":"Cache pollution","score":0.6048949956893921},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5945268869400024},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5543262958526611},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.5470741987228394},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.48786449432373047},{"id":"https://openalex.org/keywords/page-cache","display_name":"Page cache","score":0.4852883517742157},{"id":"https://openalex.org/keywords/cache-invalidation","display_name":"Cache invalidation","score":0.424783855676651},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.19779878854751587}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8803503513336182},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.8127037286758423},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.7733581066131592},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7440217137336731},{"id":"https://openalex.org/C28362024","wikidata":"https://www.wikidata.org/wiki/Q2067413","display_name":"Thrashing","level":2,"score":0.7100168466567993},{"id":"https://openalex.org/C27602214","wikidata":"https://www.wikidata.org/wiki/Q1868547","display_name":"Locality of reference","level":3,"score":0.6660510301589966},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.6354632377624512},{"id":"https://openalex.org/C113166858","wikidata":"https://www.wikidata.org/wiki/Q5015981","display_name":"Cache pollution","level":5,"score":0.6048949956893921},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5945268869400024},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5543262958526611},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.5470741987228394},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.48786449432373047},{"id":"https://openalex.org/C36340418","wikidata":"https://www.wikidata.org/wiki/Q7124288","display_name":"Page cache","level":5,"score":0.4852883517742157},{"id":"https://openalex.org/C25536678","wikidata":"https://www.wikidata.org/wiki/Q5015977","display_name":"Cache invalidation","level":5,"score":0.424783855676651},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.19779878854751587},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1007/s10766-022-00729-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10766-022-00729-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10766-022-00729-2.pdf","source":{"id":"https://openalex.org/S148521650","display_name":"International Journal of Parallel Programming","issn_l":"0885-7458","issn":["0885-7458","1573-7640"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Parallel Programming","raw_type":"journal-article"},{"id":"pmh:oai:null:11420/12244","is_oa":true,"landing_page_url":"http://hdl.handle.net/11420/12244","pdf_url":null,"source":{"id":"https://openalex.org/S4306401751","display_name":"tub.dok (Hamburg University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I884043246","host_organization_name":"Hamburg University of Technology","host_organization_lineage":["https://openalex.org/I884043246"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},{"id":"pmh:oai:pure.atira.dk:openaire/e0aec7c5-ee5d-4f21-89f5-1e4818dcaae4","is_oa":true,"landing_page_url":"https://pure.ulster.ac.uk/en/publications/e0aec7c5-ee5d-4f21-89f5-1e4818dcaae4","pdf_url":null,"source":{"id":"https://openalex.org/S4306402454","display_name":"Ulster University Research Portal (Ulster University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I138801177","host_organization_name":"University of Ulster","host_organization_lineage":["https://openalex.org/I138801177"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Lal, S, Sharat Chandra Varma, B & Juurlink, B 2022, 'A Quantitative Study of Locality in GPU Caches for Memory-Divergent Workloads', International Journal of Parallel Programming, vol. 50, pp. 189-216. https://doi.org/10.1007/s10766-022-00729-2","raw_type":"article"},{"id":"pmh:oai:pure.atira.dk:publications/e0aec7c5-ee5d-4f21-89f5-1e4818dcaae4","is_oa":true,"landing_page_url":"http://www.scopus.com/inward/record.url?scp=85127581640&partnerID=8YFLogxK","pdf_url":null,"source":{"id":"https://openalex.org/S4306402454","display_name":"Ulster University Research Portal (Ulster University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I138801177","host_organization_name":"University of Ulster","host_organization_lineage":["https://openalex.org/I138801177"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Lal, S, Sharat Chandra Varma, B & Juurlink, B 2022, 'A Quantitative Study of Locality in GPU Caches for Memory-Divergent Workloads', International Journal of Parallel Programming, vol. 50, pp. 189-216. https://doi.org/10.1007/s10766-022-00729-2","raw_type":"article"},{"id":"doi:10.15480/882.4333","is_oa":true,"landing_page_url":"https://doi.org/10.15480/882.4333","pdf_url":null,"source":{"id":"https://openalex.org/S7407052987","display_name":"TUHH Open Research","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"doi:10.1007/s10766-022-00729-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10766-022-00729-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10766-022-00729-2.pdf","source":{"id":"https://openalex.org/S148521650","display_name":"International Journal of Parallel Programming","issn_l":"0885-7458","issn":["0885-7458","1573-7640"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Parallel Programming","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4226062505.pdf","grobid_xml":"https://content.openalex.org/works/W4226062505.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W1825921030","https://openalex.org/W1979527452","https://openalex.org/W1982996921","https://openalex.org/W1989061323","https://openalex.org/W1990343439","https://openalex.org/W2008185810","https://openalex.org/W2020572638","https://openalex.org/W2027806965","https://openalex.org/W2047060659","https://openalex.org/W2054782014","https://openalex.org/W2055497547","https://openalex.org/W2062527253","https://openalex.org/W2080592089","https://openalex.org/W2090584832","https://openalex.org/W2091599698","https://openalex.org/W2128329055","https://openalex.org/W2144264070","https://openalex.org/W2145866640","https://openalex.org/W2156180003","https://openalex.org/W2156831150","https://openalex.org/W2171474230","https://openalex.org/W2277429057","https://openalex.org/W2605251767","https://openalex.org/W2883882491","https://openalex.org/W2905658624","https://openalex.org/W2980229124","https://openalex.org/W3010432610","https://openalex.org/W3035591131","https://openalex.org/W3104094521"],"related_works":["https://openalex.org/W2535115842","https://openalex.org/W2116323004","https://openalex.org/W2048814516","https://openalex.org/W4252128743","https://openalex.org/W1970102182","https://openalex.org/W2734782074","https://openalex.org/W2089633376","https://openalex.org/W2750716983","https://openalex.org/W2139299407","https://openalex.org/W2029311465"],"abstract_inverted_index":{"Abstract":[0],"GPUs":[1,97],"are":[2,142,174,192],"capable":[3],"of":[4,71,74,130,157,169,199],"delivering":[5],"peak":[6,11],"performance":[7,12,21,28],"in":[8,64,96,104],"TFLOPs,":[9],"however,":[10],"is":[13,25,51,68,110],"often":[14],"difficult":[15],"to":[16,19,34,60,106,145,164,194],"achieve":[17],"due":[18,144],"several":[20,58],"bottlenecks.":[22],"Memory":[23],"divergence":[24],"one":[26],"such":[27,138],"bottleneck":[29],"that":[30,127,173],"makes":[31],"it":[32],"harder":[33],"exploit":[35,61],"locality,":[36,76],"cause":[37],"cache":[38,120,132,158,171,202,214],"thrashing,":[39],"and":[40,93,134],"high":[41],"miss":[42],"rate,":[43],"therefore,":[44],"impeding":[45],"GPU":[46],"performance.":[47],"As":[48],"data":[49,62,75,91,119,146],"locality":[50,63,92,115,186,207],"crucial":[52],"for":[53,82,121,187,210],"performance,":[54],"there":[55,67,109],"have":[56],"been":[57],"efforts":[59],"GPUs.":[65],"However,":[66],"a":[69,111,170,177],"lack":[70],"quantitative":[72],"analysis":[73],"which":[77,188],"could":[78],"pave":[79],"the":[80,90,117,131,153,161,183,196,200,213],"way":[81],"optimizations.":[83],"In":[84],"this":[85],"paper,":[86],"we":[87],"quantitatively":[88],"study":[89,181],"its":[94],"limits":[95],"at":[98,116],"different":[99],"granularities.":[100],"We":[101,124],"show":[102,126],"that,":[103],"contrast":[105],"previous":[107],"studies,":[108],"significantly":[112],"higher":[113],"inter-warp":[114],"L1":[118],"memory-divergent":[122],"workloads.":[123],"further":[125,211],"about":[128],"50%":[129],"capacity":[133],"other":[135,197],"scarce":[136],"resources":[137],"as":[139],"NoC":[140],"bandwidth":[141],"wasted":[143],"over-fetch":[147],"caused":[148],"by":[149],"memory":[150,190],"divergence.":[151],"While":[152],"low":[154],"spatial":[155,185,206],"utilization":[156],"lines":[159],"justifies":[160],"sectored-cache":[162],"design":[163],"only":[165],"fetch":[166,195],"those":[167],"sectors":[168,198],"line":[172],"needed":[175,193],"during":[176],"request,":[178],"our":[179],"limit":[180],"reveals":[182],"lost":[184,205],"additional":[189],"requests":[191],"same":[201],"line.":[203],"The":[204],"presents":[208],"opportunities":[209],"optimizing":[212],"design.":[215]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
