{"id":"https://openalex.org/W3043492469","doi":"https://doi.org/10.1109/isca45697.2020.00087","title":"Independent Forward Progress of Work-groups","display_name":"Independent Forward Progress of Work-groups","publication_year":2020,"publication_date":"2020-05-01","ids":{"openalex":"https://openalex.org/W3043492469","doi":"https://doi.org/10.1109/isca45697.2020.00087","mag":"3043492469"},"language":"en","primary_location":{"id":"doi:10.1109/isca45697.2020.00087","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isca45697.2020.00087","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 ACM/IEEE 47th Annual International Symposium on Computer Architecture (ISCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057081108","display_name":"Alexandru Du\u0163u","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Alexandru Du\u0163u","raw_affiliation_strings":["AMD Research"],"affiliations":[{"raw_affiliation_string":"AMD Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047054160","display_name":"Matthew D. Sinclair","orcid":"https://orcid.org/0000-0003-0189-7895"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matthew D. Sinclair","raw_affiliation_strings":["AMD Research"],"affiliations":[{"raw_affiliation_string":"AMD Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077056823","display_name":"Bradford M. Beckmann","orcid":"https://orcid.org/0000-0002-5444-6521"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bradford M. Beckmann","raw_affiliation_strings":["AMD Research"],"affiliations":[{"raw_affiliation_string":"AMD Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075888890","display_name":"David A. Wood","orcid":"https://orcid.org/0000-0002-9748-8561"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"David A. Wood","raw_affiliation_strings":["AMD Research"],"affiliations":[{"raw_affiliation_string":"AMD Research","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011003147","display_name":"Marcus Chow","orcid":"https://orcid.org/0000-0002-2577-8914"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marcus Chow","raw_affiliation_strings":["AMD Research"],"affiliations":[{"raw_affiliation_string":"AMD Research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5057081108"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.1552,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.76727642,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1022","last_page":"1035"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8381654024124146},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.631301760673523},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.629541277885437},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.6294999122619629},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5557422637939453},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.5282613039016724},{"id":"https://openalex.org/keywords/context-switch","display_name":"Context switch","score":0.518186628818512},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.43194475769996643},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.42087480425834656},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.27664321660995483},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.14328235387802124},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.11784562468528748},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.10690227150917053}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8381654024124146},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.631301760673523},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.629541277885437},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.6294999122619629},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5557422637939453},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.5282613039016724},{"id":"https://openalex.org/C53833338","wikidata":"https://www.wikidata.org/wiki/Q1061424","display_name":"Context switch","level":2,"score":0.518186628818512},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.43194475769996643},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.42087480425834656},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.27664321660995483},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.14328235387802124},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.11784562468528748},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.10690227150917053},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isca45697.2020.00087","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isca45697.2020.00087","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 ACM/IEEE 47th Annual International Symposium on Computer Architecture (ISCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.4099999964237213,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W1897254069","https://openalex.org/W2022185456","https://openalex.org/W2032309817","https://openalex.org/W2046750610","https://openalex.org/W2052207834","https://openalex.org/W2057363235","https://openalex.org/W2081040934","https://openalex.org/W2090278477","https://openalex.org/W2097643185","https://openalex.org/W2099865945","https://openalex.org/W2101209730","https://openalex.org/W2103772475","https://openalex.org/W2104489510","https://openalex.org/W2117689653","https://openalex.org/W2125551452","https://openalex.org/W2136572251","https://openalex.org/W2143700077","https://openalex.org/W2150618958","https://openalex.org/W2153636750","https://openalex.org/W2159826908","https://openalex.org/W2169706611","https://openalex.org/W2170582961","https://openalex.org/W2235785597","https://openalex.org/W2236227338","https://openalex.org/W2238700765","https://openalex.org/W2238992335","https://openalex.org/W2294911930","https://openalex.org/W2323693848","https://openalex.org/W2335888987","https://openalex.org/W2399715892","https://openalex.org/W2474388053","https://openalex.org/W2474844316","https://openalex.org/W2517837804","https://openalex.org/W2528784626","https://openalex.org/W2535050116","https://openalex.org/W2554192763","https://openalex.org/W2563372635","https://openalex.org/W2566334102","https://openalex.org/W2567317362","https://openalex.org/W2581065617","https://openalex.org/W2591927543","https://openalex.org/W2593535610","https://openalex.org/W2622728018","https://openalex.org/W2626811727","https://openalex.org/W2765329037","https://openalex.org/W2774005267","https://openalex.org/W2787181861","https://openalex.org/W2791833900","https://openalex.org/W2886199004","https://openalex.org/W2897849877","https://openalex.org/W2963723139","https://openalex.org/W2964281804","https://openalex.org/W3006138624","https://openalex.org/W4231595696","https://openalex.org/W4233930397","https://openalex.org/W4236713805","https://openalex.org/W4236786453","https://openalex.org/W4239826705","https://openalex.org/W4239965559","https://openalex.org/W4241667468","https://openalex.org/W4243825971","https://openalex.org/W4255847023","https://openalex.org/W4385489926","https://openalex.org/W6639801777","https://openalex.org/W6677381316","https://openalex.org/W6721281333","https://openalex.org/W6734815144","https://openalex.org/W6748515141","https://openalex.org/W6753650927"],"related_works":["https://openalex.org/W2931688134","https://openalex.org/W2377919138","https://openalex.org/W2378857091","https://openalex.org/W103652678","https://openalex.org/W4226090359","https://openalex.org/W2059697060","https://openalex.org/W936373746","https://openalex.org/W2975817033","https://openalex.org/W4256502920","https://openalex.org/W4382701072"],"abstract_inverted_index":{"GPUs":[0,25],"have":[1],"evolved":[2],"from":[3,154],"providing":[4],"highly-constrained":[5],"programmability":[6],"for":[7,19,32,73,114,189,197],"a":[8,103,164,177],"single":[9],"kernel":[10],"to":[11,14,38,64,85,159,225],"using":[12],"pre-emption":[13],"ensure":[15,28],"independent":[16,29,43,119],"forward":[17,30,44,120],"progress":[18,31,45,121],"concurrently":[20],"executing":[21],"kernels.":[22],"However,":[23],"modern":[24],"do":[26,208],"not":[27,209],"kernels":[33,53],"that":[34,68,117,191,207],"use":[35,192],"fine-grain":[36,126],"synchronization":[37,128],"coordinate":[39],"inter-work-group":[40],"execution.":[41],"Enabling":[42],"among":[46],"work-groups":[47],"(WGs)":[48],"is":[49],"challenging":[50],"as":[51,216],"pre-empted":[52],"may":[54],"be":[55],"rescheduled":[56],"with":[57,141],"fewer":[58],"hardware":[59,71,107,112,170],"resources.":[60,171],"This":[61],"can":[62],"lead":[63],"oversubscribed":[65,183,213],"execution":[66],"scenarios":[67],"deadlock":[69,210],"current":[70],"even":[72],"correctly":[74],"written":[75],"code.":[76],"Prior":[77],"work":[78],"addresses":[79],"this":[80],"problem":[81],"by":[82,185,227],"requiring":[83],"programmers":[84],"specify":[86],"resource":[87,92,131],"requirements":[88],"and":[89,98,129,156,168,195,229],"assuming":[90],"static":[91],"allocation,":[93],"which":[94],"adds":[95],"scheduling":[96],"constraints":[97],"reduces":[99],"portability.":[100],"We":[101],"propose":[102,135],"family":[104],"of":[105,125],"novel":[106],"approaches":[108],"-":[109,116],"trading":[110],"off":[111],"complexity":[113],"performance":[115],"provide":[118],"in":[122,182,211],"the":[123,212],"presence":[124],"inter-WG":[127],"dynamic":[130],"allocation.":[132],"Additionally,":[133],"we":[134],"new":[136],"waiting":[137,157],"atomic":[138],"instructions":[139],"compatible":[140],"proposed":[142],"C++":[143],"20":[144],"extensions.":[145],"Our":[146],"final":[147],"design,":[148],"Autonomous":[149],"Work-Groups":[150],"(AWG),":[151],"uses":[152],"hints":[153],"regular":[155],"atomics":[158],"cooperatively":[160],"schedule":[161],"WGs":[162],"within":[163],"kernel,":[165],"improving":[166],"efficiency":[167],"virtualizing":[169],"In":[172],"non-oversubscribed":[173],"scenarios,":[174],"AWG":[175,203],"outperforms":[176,204],"busy-waiting":[178],"baseline":[179],"(which":[180],"deadlocks":[181],"scenarios)":[184],"12\u00d7":[186],"on":[187],"average":[188],"benchmarks":[190],"different":[193],"mutexes":[194],"barriers":[196],"fine-grained,":[198],"WG":[199],"granularity":[200],"synchronization.":[201],"Furthermore,":[202],"other":[205],"solutions":[206],"case,":[214],"such":[215],"fixed-interval":[217],"round-robin":[218],"context":[219],"switching":[220],"or":[221],"naively":[222],"extending":[223],"monitor/mwait":[224],"GPUs,":[226],"2.6\u00d7":[228],"2.2\u00d7,":[230],"respectively.":[231]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
