{"id":"https://openalex.org/W3057934400","doi":"https://doi.org/10.1145/3406538","title":"Inter-kernel Reuse-aware Thread Block Scheduling","display_name":"Inter-kernel Reuse-aware Thread Block Scheduling","publication_year":2020,"publication_date":"2020-08-17","ids":{"openalex":"https://openalex.org/W3057934400","doi":"https://doi.org/10.1145/3406538","mag":"3057934400"},"language":"en","primary_location":{"id":"doi:10.1145/3406538","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3406538","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3406538","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3406538","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083833949","display_name":"Muhammad Huzaifa","orcid":"https://orcid.org/0000-0001-9245-527X"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Muhammad Huzaifa","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075312447","display_name":"Johnathan Alsop","orcid":"https://orcid.org/0000-0001-5272-2396"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Johnathan Alsop","raw_affiliation_strings":["AMD Research, Bellevue, WA, USA"],"affiliations":[{"raw_affiliation_string":"AMD Research, Bellevue, WA, USA","institution_ids":["https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069728263","display_name":"Abdulrahman Mahmoud","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Abdulrahman Mahmoud","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038708856","display_name":"Giordano Salvador","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Giordano Salvador","raw_affiliation_strings":["Unaffiliated"],"affiliations":[{"raw_affiliation_string":"Unaffiliated","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047054160","display_name":"Matthew D. Sinclair","orcid":"https://orcid.org/0000-0003-0189-7895"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]},{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew D. Sinclair","raw_affiliation_strings":["University of Wisconsin-Madison, USA and AMD Research, Bellevue, WA, USA"],"affiliations":[{"raw_affiliation_string":"University of Wisconsin-Madison, USA and AMD Research, Bellevue, WA, USA","institution_ids":["https://openalex.org/I4210108985","https://openalex.org/I135310074"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086111967","display_name":"Sarita V. Adve","orcid":"https://orcid.org/0000-0002-3403-5119"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sarita V. Adve","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5083833949"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":2.8287,"has_fulltext":true,"cited_by_count":17,"citation_normalized_percentile":{"value":0.90807839,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"17","issue":"3","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8864461183547974},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.7144391536712646},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.6572351455688477},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6356099843978882},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5740349888801575},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5110455751419067},{"id":"https://openalex.org/keywords/linux-kernel","display_name":"Linux kernel","score":0.4425339102745056},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.42483046650886536},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3993558883666992},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.37865114212036133}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8864461183547974},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.7144391536712646},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.6572351455688477},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6356099843978882},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5740349888801575},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5110455751419067},{"id":"https://openalex.org/C553261973","wikidata":"https://www.wikidata.org/wiki/Q14579","display_name":"Linux kernel","level":2,"score":0.4425339102745056},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.42483046650886536},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3993558883666992},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.37865114212036133},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3406538","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3406538","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3406538","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3406538","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3406538","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3406538","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.8999999761581421,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[{"id":"https://openalex.org/G316126018","display_name":null,"funder_award_id":"CCF 13-02641,CCF 16-19245","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3191114552","display_name":null,"funder_award_id":"CCF 16-19245","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4184137766","display_name":null,"funder_award_id":"CCF 13-02641","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4887462507","display_name":null,"funder_award_id":"C-FAR","funder_id":"https://openalex.org/F4320306087","funder_display_name":"Semiconductor Research Corporation"},{"id":"https://openalex.org/G5123864600","display_name":null,"funder_award_id":"CCF 13-02641 and CCF 16-19245","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5613224033","display_name":null,"funder_award_id":"STARnet","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G6811296919","display_name":null,"funder_award_id":"C-FAR","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G6894402473","display_name":null,"funder_award_id":"Fellowship","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306087","display_name":"Semiconductor Research Corporation","ror":"https://ror.org/047z4n946"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3057934400.pdf","grobid_xml":"https://content.openalex.org/works/W3057934400.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W1968391520","https://openalex.org/W1997352364","https://openalex.org/W2008115889","https://openalex.org/W2021211271","https://openalex.org/W2024122052","https://openalex.org/W2027806965","https://openalex.org/W2035080386","https://openalex.org/W2062527253","https://openalex.org/W2067441262","https://openalex.org/W2079038734","https://openalex.org/W2093043622","https://openalex.org/W2152956697","https://openalex.org/W2164264749","https://openalex.org/W2179783112","https://openalex.org/W2319071579","https://openalex.org/W2511683765","https://openalex.org/W2521461609","https://openalex.org/W2605251767","https://openalex.org/W2605751925","https://openalex.org/W2626312854","https://openalex.org/W2765329037","https://openalex.org/W2883882491","https://openalex.org/W2883929540","https://openalex.org/W2902943377","https://openalex.org/W3096414051","https://openalex.org/W4232973962","https://openalex.org/W4247783737","https://openalex.org/W4252742548"],"related_works":["https://openalex.org/W1657880117","https://openalex.org/W2595172197","https://openalex.org/W2127970246","https://openalex.org/W2084856301","https://openalex.org/W1001352512","https://openalex.org/W4382618745","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W2748922771","https://openalex.org/W1981130628"],"abstract_inverted_index":{"As":[0],"GPUs":[1],"have":[2,11,99],"become":[3],"more":[4],"programmable,":[5],"their":[6],"performance":[7,38],"and":[8,30,39,98,117,121,127,129],"energy":[9,118],"benefits":[10],"made":[12],"them":[13],"increasingly":[14,35],"popular.":[15],"However,":[16],"while":[17,85],"GPU":[18,42,54,61],"compute":[19],"units":[20],"continue":[21],"to":[22,69,89,104],"improve":[23],"in":[24,37,53,124,132],"performance,":[25],"on-chip":[26],"memories":[27],"lag":[28],"behind":[29],"data":[31,51],"accesses":[32],"are":[33,65,95],"becoming":[34],"expensive":[36],"energy.":[40],"Emerging":[41],"coherence":[43],"protocols":[44],"can":[45],"mitigate":[46],"this":[47],"bottleneck":[48],"by":[49,119],"exploiting":[50],"reuse":[52,84],"caches":[55],"across":[56],"kernel":[57],"boundaries.":[58],"Unfortunately,":[59],"current":[60],"thread":[62,78],"block":[63,79],"schedulers":[64,80,94],"typically":[66],"not":[67],"designed":[68],"expose":[70],"such":[71],"reuse.":[72],"This":[73],"article":[74],"proposes":[75],"new":[76],"hardware":[77],"that":[81],"optimize":[82],"inter-kernel":[83],"using":[86],"work":[87],"stealing":[88],"preserve":[90],"load":[91],"balance.":[92],"Our":[93],"simple,":[96],"decentralized,":[97],"extremely":[100],"low":[101],"overhead.":[102],"Compared":[103],"a":[105],"baseline":[106],"round-robin":[107],"scheduler,":[108],"the":[109],"best":[110],"performing":[111],"scheduler":[112],"reduces":[113],"average":[114],"execution":[115],"time":[116],"19%":[120],"11%,":[122],"respectively,":[123,131],"regular":[125],"applications,":[126],"10%":[128],"8%,":[130],"irregular":[133],"applications.":[134]},"counts_by_year":[{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
