{"id":"https://openalex.org/W2326078278","doi":"https://doi.org/10.1109/hpca.2016.7446089","title":"Selective GPU caches to eliminate CPU-GPU HW cache coherence","display_name":"Selective GPU caches to eliminate CPU-GPU HW cache coherence","publication_year":2016,"publication_date":"2016-03-01","ids":{"openalex":"https://openalex.org/W2326078278","doi":"https://doi.org/10.1109/hpca.2016.7446089","mag":"2326078278"},"language":"en","primary_location":{"id":"doi:10.1109/hpca.2016.7446089","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2016.7446089","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034317620","display_name":"Neha Agarwal","orcid":"https://orcid.org/0000-0002-9029-4166"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Neha Agarwal","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031781240","display_name":"David Nellans","orcid":"https://orcid.org/0000-0001-5203-8367"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David Nellans","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089789557","display_name":"Eiman Ebrahimi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eiman Ebrahimi","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018949021","display_name":"Thomas F. Wenisch","orcid":"https://orcid.org/0000-0001-9560-2124"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thomas F. Wenisch","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043167145","display_name":"John M. Danskin","orcid":null},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John Danskin","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112796474","display_name":"Stephen W. Keckler","orcid":null},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan\u2013Ann Arbor","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stephen W. Keckler","raw_affiliation_strings":["University of Michigan"],"affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5034317620"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":11.6674,"has_fulltext":false,"cited_by_count":54,"citation_normalized_percentile":{"value":0.99069024,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"494","last_page":"506"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8975273966789246},{"id":"https://openalex.org/keywords/cache-coherence","display_name":"Cache coherence","score":0.7366670966148376},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6519947648048401},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6090331077575684},{"id":"https://openalex.org/keywords/mesi-protocol","display_name":"MESI protocol","score":0.5257816314697266},{"id":"https://openalex.org/keywords/shared-memory","display_name":"Shared memory","score":0.4686189293861389},{"id":"https://openalex.org/keywords/bus-sniffing","display_name":"Bus sniffing","score":0.4644809663295746},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.4327548146247864},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.34025439620018005},{"id":"https://openalex.org/keywords/cache-coloring","display_name":"Cache coloring","score":0.3322409987449646},{"id":"https://openalex.org/keywords/cache-algorithms","display_name":"Cache algorithms","score":0.28717008233070374}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8975273966789246},{"id":"https://openalex.org/C141917322","wikidata":"https://www.wikidata.org/wiki/Q1025017","display_name":"Cache coherence","level":5,"score":0.7366670966148376},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6519947648048401},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6090331077575684},{"id":"https://openalex.org/C120936851","wikidata":"https://www.wikidata.org/wiki/Q1408065","display_name":"MESI protocol","level":5,"score":0.5257816314697266},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.4686189293861389},{"id":"https://openalex.org/C51185590","wikidata":"https://www.wikidata.org/wiki/Q1017228","display_name":"Bus sniffing","level":5,"score":0.4644809663295746},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.4327548146247864},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.34025439620018005},{"id":"https://openalex.org/C201148951","wikidata":"https://www.wikidata.org/wiki/Q5015976","display_name":"Cache coloring","level":4,"score":0.3322409987449646},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.28717008233070374}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca.2016.7446089","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca.2016.7446089","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":76,"referenced_works":["https://openalex.org/W127059985","https://openalex.org/W1580997674","https://openalex.org/W1967373117","https://openalex.org/W1979527452","https://openalex.org/W1980951609","https://openalex.org/W1994999558","https://openalex.org/W1997319712","https://openalex.org/W1997352364","https://openalex.org/W1997576530","https://openalex.org/W2010802738","https://openalex.org/W2014555481","https://openalex.org/W2016521571","https://openalex.org/W2028918287","https://openalex.org/W2031043969","https://openalex.org/W2053776346","https://openalex.org/W2062430565","https://openalex.org/W2066691060","https://openalex.org/W2074812550","https://openalex.org/W2075096358","https://openalex.org/W2080592089","https://openalex.org/W2081644593","https://openalex.org/W2082000934","https://openalex.org/W2083780331","https://openalex.org/W2083963764","https://openalex.org/W2092212481","https://openalex.org/W2095894588","https://openalex.org/W2098737795","https://openalex.org/W2100926301","https://openalex.org/W2102843684","https://openalex.org/W2106193445","https://openalex.org/W2110683663","https://openalex.org/W2114667497","https://openalex.org/W2114827297","https://openalex.org/W2116751788","https://openalex.org/W2118383989","https://openalex.org/W2119027087","https://openalex.org/W2123845384","https://openalex.org/W2126877041","https://openalex.org/W2131655821","https://openalex.org/W2132773256","https://openalex.org/W2133156997","https://openalex.org/W2137196255","https://openalex.org/W2143443573","https://openalex.org/W2148032086","https://openalex.org/W2148103955","https://openalex.org/W2150618958","https://openalex.org/W2151032144","https://openalex.org/W2151233837","https://openalex.org/W2151460056","https://openalex.org/W2155063683","https://openalex.org/W2156094106","https://openalex.org/W2157802978","https://openalex.org/W2157860533","https://openalex.org/W2158190559","https://openalex.org/W2158551169","https://openalex.org/W2162192855","https://openalex.org/W2165039583","https://openalex.org/W2166940427","https://openalex.org/W2169387321","https://openalex.org/W2236227338","https://openalex.org/W2273440736","https://openalex.org/W2912601938","https://openalex.org/W3148387930","https://openalex.org/W4232664043","https://openalex.org/W4233675595","https://openalex.org/W4234436083","https://openalex.org/W4238182192","https://openalex.org/W4241790656","https://openalex.org/W4249165316","https://openalex.org/W4250205214","https://openalex.org/W4253679649","https://openalex.org/W4256629673","https://openalex.org/W6653887427","https://openalex.org/W6678990771","https://openalex.org/W6682210026","https://openalex.org/W6694513646"],"related_works":["https://openalex.org/W2290195868","https://openalex.org/W4304166325","https://openalex.org/W2057019356","https://openalex.org/W2290179447","https://openalex.org/W1482370651","https://openalex.org/W2407815036","https://openalex.org/W2079555365","https://openalex.org/W4255008187","https://openalex.org/W2135302104","https://openalex.org/W2075222997"],"abstract_inverted_index":{"Cache":[0],"coherence":[1,25,48,84,100,194],"is":[2,49],"ubiquitous":[3],"in":[4,52],"shared":[5],"memory":[6,15,80,140],"multiprocessors":[7],"because":[8],"it":[9],"provides":[10],"a":[11,65,125,169,178,191],"simple,":[12],"high":[13],"performance":[14,110],"abstraction":[16],"to":[17,30,86,107,129,146,163,174,185],"programmers.":[18],"Recent":[19],"work":[20],"suggests":[21],"extending":[22],"hardware":[23,46,160,179,193],"cache":[24,47,161],"between":[26,39,88],"CPUs":[27,56,187],"and":[28,41,57,91,124,188],"GPUs":[29,58,189],"help":[31],"support":[32,130],"programming":[33],"models":[34],"with":[35,54],"tightly":[36],"coordinated":[37],"sharing":[38],"CPU":[40,90,99],"GPU":[42,76,96,135,149,172],"threads.":[43],"However,":[44],"implementing":[45],"particularly":[50],"challenging":[51],"systems":[53],"discrete":[55],"that":[59,81],"may":[60],"not":[61],"be":[62],"produced":[63],"by":[64],"single":[66,192],"vendor.":[67],"Instead,":[68],"we":[69,74,142],"propose,":[70],"selective":[71,113,170],"caching,":[72],"wherein":[73],"disallow":[75],"caching":[77,120,150,171],"of":[78,112,151,177],"any":[79],"would":[82],"require":[83],"updates":[85],"propagate":[87],"the":[89,95,109,183],"GPU,":[92],"thereby":[93],"decoupling":[94],"from":[97],"vendor-specific":[98],"protocols.":[101],"We":[102],"propose":[103],"several":[104],"architectural":[105],"improvements":[106],"offset":[108],"penalty":[111],"caching:":[114],"aggressive":[115],"request":[116],"coalescing,":[117],"CPU-side":[118],"coherent":[119],"for":[121],"GPU-uncacheable":[122],"requests,":[123],"CPU-GPU":[126],"interconnect":[127],"optimization":[128],"variable-size":[131],"transfers.":[132],"Moreover,":[133],"current":[134],"workloads":[136],"access":[137],"many":[138],"read-only":[139],"pages;":[141],"exploit":[143],"this":[144],"property":[145],"allow":[147],"promiscuous":[148],"these":[152],"pages,":[153],"relying":[154],"on":[155],"page-level":[156],"protection,":[157],"rather":[158],"than":[159],"coherence,":[162],"ensure":[164],"correctness.":[165],"These":[166],"optimizations":[167],"bring":[168],"implementation":[173,181],"within":[175],"93%":[176],"cache-coherent":[180],"without":[182],"need":[184],"integrate":[186],"under":[190],"protocol.":[195]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":8},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":18},{"year":2016,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
