{"id":"https://openalex.org/W2950969115","doi":"https://doi.org/10.1145/3322127","title":"An Efficient GPU Cache Architecture for Applications with Irregular Memory Access Patterns","display_name":"An Efficient GPU Cache Architecture for Applications with Irregular Memory Access Patterns","publication_year":2019,"publication_date":"2019-06-17","ids":{"openalex":"https://openalex.org/W2950969115","doi":"https://doi.org/10.1145/3322127","mag":"2950969115"},"language":"en","primary_location":{"id":"doi:10.1145/3322127","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3322127","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3322127","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3322127","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070637189","display_name":"Bingchao Li","orcid":"https://orcid.org/0000-0002-8629-6265"},"institutions":[{"id":"https://openalex.org/I28813325","display_name":"Civil Aviation University of China","ror":"https://ror.org/03je71k37","country_code":"CN","type":"education","lineage":["https://openalex.org/I28813325"]},{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bingchao Li","raw_affiliation_strings":["Civil Aviation University of China 8 Tianjin University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Civil Aviation University of China 8 Tianjin University, China","institution_ids":["https://openalex.org/I28813325","https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102122678","display_name":"Jizeng Wei","orcid":"https://orcid.org/0000-0002-3040-6859"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jizeng Wei","raw_affiliation_strings":["Tianjin University, Tianjin, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100882285","display_name":"Jizhou Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jizhou Sun","raw_affiliation_strings":["Tianjin University, Tianjin, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018033573","display_name":"Murali Annavaram","orcid":"https://orcid.org/0000-0002-4633-6867"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Murali Annavaram","raw_affiliation_strings":["University of Southern California, Los Angeles, California, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, California, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037648751","display_name":"Nam Sung Kim","orcid":"https://orcid.org/0000-0002-0442-5634"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nam Sung Kim","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, Illinois, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, Illinois, USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5070637189"],"corresponding_institution_ids":["https://openalex.org/I162868743","https://openalex.org/I28813325"],"apc_list":null,"apc_paid":null,"fwci":1.7276,"has_fulltext":true,"cited_by_count":17,"citation_normalized_percentile":{"value":0.83692074,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"16","issue":"3","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8936132192611694},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.7401255369186401},{"id":"https://openalex.org/keywords/cache-pollution","display_name":"Cache pollution","score":0.6992501616477966},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6729108691215515},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6706409454345703},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.6011836528778076},{"id":"https://openalex.org/keywords/page-cache","display_name":"Page cache","score":0.5578276515007019},{"id":"https://openalex.org/keywords/cache-only-memory-architecture","display_name":"Cache-only memory architecture","score":0.5109145045280457},{"id":"https://openalex.org/keywords/cache-invalidation","display_name":"Cache invalidation","score":0.4911622703075409},{"id":"https://openalex.org/keywords/non-uniform-memory-access","display_name":"Non-uniform memory access","score":0.487064391374588},{"id":"https://openalex.org/keywords/uniform-memory-access","display_name":"Uniform memory access","score":0.45477718114852905},{"id":"https://openalex.org/keywords/smart-cache","display_name":"Smart Cache","score":0.4494275748729706},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.3972817063331604},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.37211471796035767},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.315044105052948},{"id":"https://openalex.org/keywords/semiconductor-memory","display_name":"Semiconductor memory","score":0.18818581104278564}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8936132192611694},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.7401255369186401},{"id":"https://openalex.org/C113166858","wikidata":"https://www.wikidata.org/wiki/Q5015981","display_name":"Cache pollution","level":5,"score":0.6992501616477966},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6729108691215515},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6706409454345703},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.6011836528778076},{"id":"https://openalex.org/C36340418","wikidata":"https://www.wikidata.org/wiki/Q7124288","display_name":"Page cache","level":5,"score":0.5578276515007019},{"id":"https://openalex.org/C3720319","wikidata":"https://www.wikidata.org/wiki/Q5015937","display_name":"Cache-only memory architecture","level":5,"score":0.5109145045280457},{"id":"https://openalex.org/C25536678","wikidata":"https://www.wikidata.org/wiki/Q5015977","display_name":"Cache invalidation","level":5,"score":0.4911622703075409},{"id":"https://openalex.org/C133371097","wikidata":"https://www.wikidata.org/wiki/Q868014","display_name":"Non-uniform memory access","level":5,"score":0.487064391374588},{"id":"https://openalex.org/C51290061","wikidata":"https://www.wikidata.org/wiki/Q1936765","display_name":"Uniform memory access","level":4,"score":0.45477718114852905},{"id":"https://openalex.org/C167713795","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"Smart Cache","level":5,"score":0.4494275748729706},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.3972817063331604},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.37211471796035767},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.315044105052948},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.18818581104278564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3322127","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3322127","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3322127","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3322127","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3322127","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3322127","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.6200000047683716}],"awards":[{"id":"https://openalex.org/G2215210296","display_name":null,"funder_award_id":"61702521","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4307412102","display_name":null,"funder_award_id":"18JCQNJC00400","funder_id":"https://openalex.org/F4320323993","funder_display_name":"Natural Science Foundation of Tianjin City"},{"id":"https://openalex.org/G5986341906","display_name":null,"funder_award_id":"CNS 17-05047","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6541138388","display_name":null,"funder_award_id":"3122018C021","funder_id":"https://openalex.org/F4320321565","funder_display_name":"Civil Aviation University of China"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321565","display_name":"Civil Aviation University of China","ror":"https://ror.org/03je71k37"},{"id":"https://openalex.org/F4320323993","display_name":"Natural Science Foundation of Tianjin City","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2950969115.pdf","grobid_xml":"https://content.openalex.org/works/W2950969115.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W1845650582","https://openalex.org/W1979527452","https://openalex.org/W1980136882","https://openalex.org/W1982996921","https://openalex.org/W1989061323","https://openalex.org/W1997162567","https://openalex.org/W1997352364","https://openalex.org/W2008115889","https://openalex.org/W2018658595","https://openalex.org/W2021211271","https://openalex.org/W2027806965","https://openalex.org/W2038932419","https://openalex.org/W2047060659","https://openalex.org/W2062527253","https://openalex.org/W2067441262","https://openalex.org/W2072768743","https://openalex.org/W2080958936","https://openalex.org/W2083839254","https://openalex.org/W2084110734","https://openalex.org/W2091583059","https://openalex.org/W2092406519","https://openalex.org/W2093043622","https://openalex.org/W2098505406","https://openalex.org/W2103760115","https://openalex.org/W2128120785","https://openalex.org/W2129817042","https://openalex.org/W2130749431","https://openalex.org/W2155503253","https://openalex.org/W2156831150","https://openalex.org/W2236252626","https://openalex.org/W2238992335","https://openalex.org/W2464177207","https://openalex.org/W2517837804","https://openalex.org/W2564688911","https://openalex.org/W2897830517"],"related_works":["https://openalex.org/W1598433531","https://openalex.org/W1974211070","https://openalex.org/W126098351","https://openalex.org/W2623065547","https://openalex.org/W2115782696","https://openalex.org/W3145643808","https://openalex.org/W2156586505","https://openalex.org/W3111801817","https://openalex.org/W3172228199","https://openalex.org/W2363750085"],"abstract_inverted_index":{"GPUs":[0,98],"provide":[1],"high-bandwidth/low-latency":[2],"on-chip":[3],"shared":[4,95,194],"memory":[5,17,21,25,71,82,96,136,155,169,195,225,236,258,271,285],"and":[6,81,88,123,134,238,277],"L1":[7,38,46,59,92,125,142,176,188,211],"cache":[8,39,47,60,126,143,161,177,189,212],"to":[9,37,79,139,157,222],"efficiently":[10,65,119],"service":[11],"a":[12,158],"large":[13,35],"number":[14],"of":[15,45,54,97,175,210,235,242,254,267,281],"concurrent":[16,20],"requests.":[18],"Specifically,":[19,145],"requests":[22,72,86,226],"accessing":[23],"contiguous":[24],"space":[26,156,196],"are":[27],"coalesced":[28],"into":[29],"warp-wide":[30,55],"accesses.":[31,56],"To":[32,205],"support":[33,120],"such":[34,58],"accesses":[36],"with":[40,73,131,213,256,269,283],"low":[41],"latency,":[42],"the":[43,94,141,173,193,207,232,240,251,265,279],"size":[44],"line":[48,127,190],"is":[49,99,198],"no":[50],"smaller":[51],"than":[52],"that":[53,84,197,248],"However,":[57],"architecture":[61],"cannot":[62],"always":[63],"be":[64],"utilized":[66],"when":[67],"applications":[68,130,255,268,282],"generate":[69],"many":[70,104,203],"irregular":[74,135,257,284],"access":[75,137,259,272,286],"patterns":[76,138,260,287],"especially":[77],"due":[78],"branch":[80],"divergences":[83],"make":[85],"uncoalesced":[87],"small.":[89],"Furthermore,":[90,163],"unlike":[91],"cache,":[93],"not":[100,199],"often":[101],"used":[102,201],"in":[103,153,192,202,227],"applications,":[105],"which":[106,117,229],"essentially":[107],"depends":[108],"on":[109],"programmers.":[110],"In":[111],"this":[112],"article,":[113],"we":[114,218],"propose":[115,220],"Elastic-Cache,":[116],"can":[118,147,230],"both":[121,132],"fine-":[122],"coarse-grained":[124],"management":[128],"for":[129,178,186,215],"regular":[133,270],"improve":[140,206,239],"efficiency.":[144],"it":[146,164,182],"store":[148],"32-":[149],"or":[150],"64-byte":[151],"words":[152],"non-contiguous":[154],"single":[159],"128-byte":[160],"line.":[162],"neither":[165],"requires":[166],"an":[167],"extra":[168],"structure":[170],"nor":[171],"reduces":[172],"capacity":[174],"tag":[179],"storage,":[180],"since":[181],"stores":[183],"auxiliary":[184],"tags":[185],"fine-grained":[187,216],"managements":[191],"fully":[200],"applications.":[204],"bandwidth":[208],"utilization":[209],"Elastic-Cache":[214,249,276],"accesses,":[217],"further":[219],"Elastic-Plus":[221,274],"issue":[223],"32-byte":[224],"parallel,":[228],"reduce":[231],"processing":[233],"latency":[234],"instructions":[237],"throughput":[241],"GPUs.":[243],"Our":[244],"experiment":[245],"result":[246],"shows":[247],"improves":[250,278],"geometric-mean":[252],"performance":[253,266,280],"by":[261,288],"104%":[262],"without":[263],"degrading":[264],"patterns.":[273],"outperforms":[275],"131%.":[289]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-05-13T08:25:38.343686","created_date":"2025-10-10T00:00:00"}
