{"id":"https://openalex.org/W4233930397","doi":"https://doi.org/10.1109/isca.2014.6853209","title":"Fine-grain task aggregation and coordination on GPUs","display_name":"Fine-grain task aggregation and coordination on GPUs","publication_year":2014,"publication_date":"2014-06-01","ids":{"openalex":"https://openalex.org/W4233930397","doi":"https://doi.org/10.1109/isca.2014.6853209"},"language":"en","primary_location":{"id":"doi:10.1109/isca.2014.6853209","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isca.2014.6853209","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 ACM/IEEE 41st International Symposium on Computer Architecture (ISCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003216606","display_name":"Marc S. Orr","orcid":null},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marc S. Orr","raw_affiliation_strings":["University of Wisconsin-Madison Computer Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison Computer Sciences","institution_ids":["https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077056823","display_name":"Bradford M. Beckmann","orcid":"https://orcid.org/0000-0002-5444-6521"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bradford M. Beckmann","raw_affiliation_strings":["AMD Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AMD Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109010789","display_name":"Steven K. Reinhardt","orcid":"https://orcid.org/0000-0002-2479-0030"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Steven K. Reinhardt","raw_affiliation_strings":["AMD Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AMD Research","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075888890","display_name":"David A. Wood","orcid":"https://orcid.org/0000-0002-9748-8561"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David A. Wood","raw_affiliation_strings":["University of Wisconsin-Madison Computer Sciences"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison Computer Sciences","institution_ids":["https://openalex.org/I135310074"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.2057,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.88534652,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"181","last_page":"192"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8915284276008606},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7487797141075134},{"id":"https://openalex.org/keywords/programmer","display_name":"Programmer","score":0.6270992755889893},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.5301764011383057},{"id":"https://openalex.org/keywords/runtime-system","display_name":"Runtime system","score":0.5113453269004822},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.46646109223365784},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.4153631031513214},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3628869652748108},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3440481722354889},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.28337180614471436},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1877918839454651}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8915284276008606},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7487797141075134},{"id":"https://openalex.org/C2778514511","wikidata":"https://www.wikidata.org/wiki/Q1374194","display_name":"Programmer","level":2,"score":0.6270992755889893},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.5301764011383057},{"id":"https://openalex.org/C2780870223","wikidata":"https://www.wikidata.org/wiki/Q1004415","display_name":"Runtime system","level":2,"score":0.5113453269004822},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.46646109223365784},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.4153631031513214},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3628869652748108},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3440481722354889},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.28337180614471436},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1877918839454651}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isca.2014.6853209","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isca.2014.6853209","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 ACM/IEEE 41st International Symposium on Computer Architecture (ISCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1543144164","https://openalex.org/W1568192366","https://openalex.org/W1979527452","https://openalex.org/W1993301893","https://openalex.org/W1996511977","https://openalex.org/W2001488415","https://openalex.org/W2004937484","https://openalex.org/W2016706026","https://openalex.org/W2025516544","https://openalex.org/W2047434043","https://openalex.org/W2053776346","https://openalex.org/W2054397171","https://openalex.org/W2057363235","https://openalex.org/W2058790613","https://openalex.org/W2067313328","https://openalex.org/W2072725684","https://openalex.org/W2074812550","https://openalex.org/W2101209730","https://openalex.org/W2102922928","https://openalex.org/W2109051991","https://openalex.org/W2127945400","https://openalex.org/W2131296084","https://openalex.org/W2134765000","https://openalex.org/W2147657366","https://openalex.org/W2152812278","https://openalex.org/W2159952265","https://openalex.org/W2164391801","https://openalex.org/W2173213060","https://openalex.org/W3141954553","https://openalex.org/W6654630069","https://openalex.org/W6682960528","https://openalex.org/W6683813759"],"related_works":["https://openalex.org/W2328547842","https://openalex.org/W2226356789","https://openalex.org/W4312069186","https://openalex.org/W2085573357","https://openalex.org/W2143665235","https://openalex.org/W2031047992","https://openalex.org/W2094133266","https://openalex.org/W2806681060","https://openalex.org/W2165621019","https://openalex.org/W2142377409"],"abstract_inverted_index":{"In":[0],"general-purpose":[1],"graphics":[2],"processing":[3],"unit":[4],"(GPGPU)":[5],"computing,":[6],"data":[7,35],"is":[8],"processed":[9],"by":[10,103],"concurrent":[11],"threads":[12],"executing":[13],"the":[14,26,46,74,80,91,97,147],"same":[15],"function.":[16],"This":[17],"model,":[18],"dubbed":[19],"single-instruction/multiple-thread":[20],"(SIMT),":[21],"requires":[22],"programmers":[23],"to":[24,101,125],"coordinate":[25],"synchronous":[27],"execution":[28,128],"of":[29,34,82,152],"similar":[30],"operations":[31],"across":[32],"thousands":[33],"elements.":[36],"To":[37,67,78,110],"alleviate":[38],"this":[39,68],"programmer":[40],"burden,":[41],"Gaster":[42],"and":[43,72,132],"Howes":[44],"outlined":[45],"channel":[47,76],"abstraction,":[48],"which":[49],"facilitates":[50],"dynamically":[51],"aggregating":[52],"asynchronously":[53],"produced":[54],"fine-grain":[55],"work":[56],"into":[57],"coarser-grain":[58],"tasks.":[59],"However,":[60],"no":[61],"practical":[62],"implementation":[63],"has":[64],"been":[65],"proposed.":[66],"end,":[69],"we":[70,84,117,135],"propose":[71,118],"evaluate":[73],"first":[75],"implementation.":[77],"demonstrate":[79],"utility":[81],"channels,":[83],"present":[85],"a":[86,107,119],"case":[87],"study":[88],"that":[89,122,142],"maps":[90],"fine-grain,":[92],"recursive":[93],"task":[94],"spawning":[95],"in":[96,114],"Cilk":[98,138,143],"programming":[99],"language":[100],"channels":[102,131],"representing":[104],"it":[105],"as":[106,153,155],"flow":[108],"graph.":[109],"support":[111],"data-parallel":[112],"recursion":[113],"bounded":[115],"memory,":[116],"hardware":[120],"mechanism":[121],"allows":[123],"wavefronts":[124],"yield":[126],"their":[127],"resources.":[129],"Through":[130],"wavefront":[133],"yield,":[134],"implement":[136],"four":[137],"benchmarks.":[139],"We":[140],"show":[141],"can":[144],"scale":[145],"with":[146],"GPU":[148],"architecture,":[149],"achieving":[150],"speedups":[151],"much":[154],"4.3x":[156],"on":[157],"eight":[158],"compute":[159],"units.":[160]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
