{"id":"https://openalex.org/W4250422783","doi":"https://doi.org/10.1109/micro.2016.7783729","title":"Lazy release consistency for GPUs","display_name":"Lazy release consistency for GPUs","publication_year":2016,"publication_date":"2016-10-01","ids":{"openalex":"https://openalex.org/W4250422783","doi":"https://doi.org/10.1109/micro.2016.7783729"},"language":"en","primary_location":{"id":"doi:10.1109/micro.2016.7783729","is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro.2016.7783729","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 49th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075312447","display_name":"Johnathan Alsop","orcid":"https://orcid.org/0000-0001-5272-2396"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Johnathan Alsop","raw_affiliation_strings":["University of Illinois at Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003216606","display_name":"Marc S. Orr","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marc S. Orr","raw_affiliation_strings":["AMD Research","University of Wisconsin-Madison"],"affiliations":[{"raw_affiliation_string":"AMD Research","institution_ids":[]},{"raw_affiliation_string":"University of Wisconsin-Madison","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077056823","display_name":"Bradford M. Beckmann","orcid":"https://orcid.org/0000-0002-5444-6521"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bradford M. Beckmann","raw_affiliation_strings":["AMD Research"],"affiliations":[{"raw_affiliation_string":"AMD Research","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075888890","display_name":"David A. Wood","orcid":"https://orcid.org/0000-0002-9748-8561"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David A. Wood","raw_affiliation_strings":["AMD Research","University of Wisconsin-Madison"],"affiliations":[{"raw_affiliation_string":"AMD Research","institution_ids":[]},{"raw_affiliation_string":"University of Wisconsin-Madison","institution_ids":["https://openalex.org/I135310074"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5075312447"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":3.2133,"has_fulltext":false,"cited_by_count":31,"citation_normalized_percentile":{"value":0.92277246,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9071251153945923},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.711922824382782},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.6965837478637695},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5968776941299438},{"id":"https://openalex.org/keywords/sequential-consistency","display_name":"Sequential consistency","score":0.5873600244522095},{"id":"https://openalex.org/keywords/cache-coherence","display_name":"Cache coherence","score":0.5779848694801331},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.534771740436554},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5252018570899963},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.5162642002105713},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.4694212079048157},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4425663948059082},{"id":"https://openalex.org/keywords/consistency-model","display_name":"Consistency model","score":0.4082440733909607},{"id":"https://openalex.org/keywords/data-consistency","display_name":"Data consistency","score":0.24955332279205322},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.19221144914627075},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1503528356552124},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.13539767265319824},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.12890493869781494},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.11480474472045898},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.09011542797088623}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9071251153945923},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.711922824382782},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.6965837478637695},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5968776941299438},{"id":"https://openalex.org/C82029504","wikidata":"https://www.wikidata.org/wiki/Q4373882","display_name":"Sequential consistency","level":4,"score":0.5873600244522095},{"id":"https://openalex.org/C141917322","wikidata":"https://www.wikidata.org/wiki/Q1025017","display_name":"Cache coherence","level":5,"score":0.5779848694801331},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.534771740436554},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5252018570899963},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.5162642002105713},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.4694212079048157},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4425663948059082},{"id":"https://openalex.org/C37279795","wikidata":"https://www.wikidata.org/wiki/Q2492305","display_name":"Consistency model","level":3,"score":0.4082440733909607},{"id":"https://openalex.org/C93361087","wikidata":"https://www.wikidata.org/wiki/Q4426698","display_name":"Data consistency","level":2,"score":0.24955332279205322},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.19221144914627075},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1503528356552124},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.13539767265319824},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.12890493869781494},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.11480474472045898},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.09011542797088623},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/micro.2016.7783729","is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro.2016.7783729","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 49th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320307757","display_name":"Advanced Micro Devices","ror":"https://ror.org/04kd6c783"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1994999558","https://openalex.org/W1997352364","https://openalex.org/W2021211271","https://openalex.org/W2053776346","https://openalex.org/W2081698031","https://openalex.org/W2083780331","https://openalex.org/W2094191591","https://openalex.org/W2122901568","https://openalex.org/W2148032086","https://openalex.org/W2150618958","https://openalex.org/W2152885346","https://openalex.org/W2153636750","https://openalex.org/W2155063683","https://openalex.org/W2164391801","https://openalex.org/W2170293694","https://openalex.org/W2234392215","https://openalex.org/W2236227338","https://openalex.org/W2237082146","https://openalex.org/W2301631282","https://openalex.org/W2470243357","https://openalex.org/W3148387930","https://openalex.org/W4229517165","https://openalex.org/W4231769592","https://openalex.org/W6689574687"],"related_works":["https://openalex.org/W2036306661","https://openalex.org/W2044887272","https://openalex.org/W246909719","https://openalex.org/W118795575","https://openalex.org/W2138711299","https://openalex.org/W1906213980","https://openalex.org/W4254064645","https://openalex.org/W1979030370","https://openalex.org/W4249870896","https://openalex.org/W2171564459"],"abstract_inverted_index":{"The":[0,45,81],"heterogeneous-race-free":[1],"(HRF)":[2],"memory":[3,39,55,113,171,254],"model":[4,114,255],"has":[5,42,121,159],"been":[6,122,160],"embraced":[7],"by":[8],"the":[9,26,34,58,105,112,129,249,287],"Heterogeneous":[10],"System":[11],"Architecture":[12],"(HSA)":[13],"Foundation":[14],"and":[15,23,79,115,155],"OpenCL":[16],"<sup":[17],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[18],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">TM</sup>":[19],"because":[20],"it":[21,76,183],"clearly":[22],"precisely":[24],"defines":[25],"behavior":[27],"of":[28,61,119,138,190,229,277],"current":[29],"GPUs.":[30],"However,":[31,108],"compared":[32,145],"to":[33,52,103,140,146,162,187,200,214,226,247],"simpler":[35,250],"SC":[36,168,251],"for":[37,169,209,252,259],"DRF":[38,170,253],"model,":[40,172],"HRF":[41,49,86],"two":[43],"shortcomings.":[44],"first":[46],"is":[47,73,77,84],"that":[48,85,128,151,266],"requires":[50],"programmers":[51,246],"label":[53],"atomic":[54],"operations":[56],"with":[57,166],"correct":[59],"scope":[60],"synchronization.":[62],"This":[63],"explicit":[64],"labeling":[65],"can":[66],"save":[67],"significant":[68],"coherence":[69,234],"overhead":[70],"when":[71,237],"synchronization":[72,165,230,239],"local,":[74],"but":[75,182],"tedious":[78],"error-prone.":[80],"second":[82,106],"shortcoming":[83],"restricts":[87],"important":[88],"dynamic":[89],"data":[90],"sharing":[91],"patterns":[92],"like":[93],"work":[94,97,153],"stealing.":[95],"Prior":[96],"on":[98,142,174,284],"remote-scope":[99],"promotion":[100],"(RSP)":[101],"attempted":[102],"resolve":[104,195],"shortcoming.":[107],"RSP":[109,120,132],"further":[110],"complicates":[111],"no":[116],"scalable":[117],"implementation":[118,133],"proposed.":[123],"For":[124],"example,":[125],"we":[126,198],"found":[127],"previously":[130,206],"proposed":[131,208],"actually":[134],"results":[135],"in":[136],"slowdowns":[137],"up":[139],"30%":[141],"large":[143],"GPUs,":[144],"a":[147,215,223,238,275],"na\u00efve":[148],"baseline":[149,180,288],"system":[150],"forgoes":[152],"stealing":[154],"scopes.":[156],"Meanwhile,":[157],"DeNovo":[158],"shown":[161],"offer":[163],"efficient":[164],"an":[167],"performing":[173,233],"average":[175,285],"21%":[176],"better":[177],"than":[178],"our":[179],"system,":[181],"introduces":[184],"additional":[185],"overheads":[186],"maintain":[188],"ownership":[189,228,258],"all":[191,260],"modified":[192,261],"data.":[193,262],"To":[194],"these":[196],"deficiencies,":[197],"propose":[199],"adapt":[201],"lazy":[202,267],"release":[203,268],"consistency":[204,269],"-":[205,213,282],"only":[207,236],"homogeneous":[210],"CPU":[211],"systems":[212],"heterogeneous":[216],"system.":[217,289],"Our":[218,263],"approach,":[219],"called":[220],"hLRC,":[221],"uses":[222],"DeNovo-like":[224],"mechanism":[225],"track":[227],"variables,":[231],"lazily":[232],"actions":[235],"variable":[240],"changes":[241],"locations.":[242],"hLRC":[243],"allows":[244],"GPU":[245],"use":[248],"without":[256],"tracking":[257],"evaluation":[264],"shows":[265],"provides":[270],"robust":[271],"performance":[272],"improvement":[273],"across":[274],"set":[276],"work-stealing":[278],"graph":[279],"analysis":[280],"applications":[281],"29%":[283],"versus":[286]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":6}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
