{"id":"https://openalex.org/W4231595696","doi":"https://doi.org/10.1109/micro.2016.7783716","title":"KLAP: Kernel launch aggregation and promotion for optimizing dynamic parallelism","display_name":"KLAP: Kernel launch aggregation and promotion for optimizing dynamic parallelism","publication_year":2016,"publication_date":"2016-10-01","ids":{"openalex":"https://openalex.org/W4231595696","doi":"https://doi.org/10.1109/micro.2016.7783716"},"language":"en","primary_location":{"id":"doi:10.1109/micro.2016.7783716","is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro.2016.7783716","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 49th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076404852","display_name":"Izzat El Hajj","orcid":"https://orcid.org/0000-0003-3356-6898"},"institutions":[{"id":"https://openalex.org/I16820183","display_name":"Illinois College","ror":"https://ror.org/02ys5x139","country_code":"US","type":"education","lineage":["https://openalex.org/I16820183"]},{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Izzat El Hajj","raw_affiliation_strings":["University of Illinois College of Law, Champaign, IL, US"],"affiliations":[{"raw_affiliation_string":"University of Illinois College of Law, Champaign, IL, US","institution_ids":["https://openalex.org/I157725225","https://openalex.org/I16820183"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044416322","display_name":"Juan G\u00f3mez-Luna","orcid":"https://orcid.org/0000-0002-6514-1571"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Juan Gomez-Luna","raw_affiliation_strings":["Universidad de C??rdoba"],"affiliations":[{"raw_affiliation_string":"Universidad de C??rdoba","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100354228","display_name":"Cheng Li","orcid":"https://orcid.org/0000-0001-7522-7072"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cheng Li","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043931807","display_name":"Li\u2010Wen Chang","orcid":"https://orcid.org/0000-0001-6515-6733"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Li-Wen Chang","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027761275","display_name":"Dejan Miloji\u010di\u0107","orcid":"https://orcid.org/0000-0001-9830-8588"},"institutions":[{"id":"https://openalex.org/I1324840837","display_name":"Hewlett-Packard (United States)","ror":"https://ror.org/059rn9488","country_code":"US","type":"company","lineage":["https://openalex.org/I1324840837"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dejan Milojicic","raw_affiliation_strings":["Hewlett-Packard Labs"],"affiliations":[{"raw_affiliation_string":"Hewlett-Packard Labs","institution_ids":["https://openalex.org/I1324840837"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040404999","display_name":"Wen\u2010mei Hwu","orcid":"https://orcid.org/0000-0003-2532-5349"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wen-mei Hwu","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5076404852"],"corresponding_institution_ids":["https://openalex.org/I16820183","https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":2.31397309,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.90499654,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8198003768920898},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.7725591659545898},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7029599547386169},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5291536450386047},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5281811952590942},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5121145248413086},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.4394334852695465},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.17176580429077148},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07497790455818176}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8198003768920898},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.7725591659545898},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7029599547386169},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5291536450386047},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5281811952590942},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5121145248413086},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.4394334852695465},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.17176580429077148},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07497790455818176},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/micro.2016.7783716","is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro.2016.7783716","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 49th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W99156441","https://openalex.org/W1964704819","https://openalex.org/W1991592471","https://openalex.org/W1997162567","https://openalex.org/W2029940394","https://openalex.org/W2044715502","https://openalex.org/W2089672258","https://openalex.org/W2090278477","https://openalex.org/W2106329447","https://openalex.org/W2119222363","https://openalex.org/W2134427337","https://openalex.org/W2143798346","https://openalex.org/W2149234156","https://openalex.org/W2156898111","https://openalex.org/W2165633563","https://openalex.org/W2238700765","https://openalex.org/W2260499435","https://openalex.org/W2279753750","https://openalex.org/W2294241027","https://openalex.org/W2511683765","https://openalex.org/W2604770745","https://openalex.org/W3103903655","https://openalex.org/W4233930397","https://openalex.org/W4236883517","https://openalex.org/W4251164127","https://openalex.org/W4252176599","https://openalex.org/W6604101874"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2013643406","https://openalex.org/W2027972911","https://openalex.org/W2157978810","https://openalex.org/W2778498407","https://openalex.org/W1966837078"],"abstract_inverted_index":{"Dynamic":[0],"parallelism":[1,29,130],"on":[2],"GPUs":[3,23],"simplifies":[4],"the":[5,33,71,88,101,109,143],"programming":[6],"of":[7,10,40,46,66,73,104,111,124,145,167,193],"many":[8],"classes":[9],"applications":[11],"that":[12,69,158,176,182],"generate":[13],"paral-lelizable":[14],"work":[15,112],"not":[16,26,184],"known":[17],"prior":[18],"to":[19,32,115,127,136],"execution.":[20],"However,":[21],"modern":[22],"architectures":[24],"do":[25],"support":[27],"dynamic":[28,47,77,171],"efficiently":[30],"due":[31],"high":[34],"kernel":[35,93,114,138,159,177],"launch":[36,80,119,123,160,178],"overhead,":[37],"limited":[38,44,146],"number":[39,103],"simultaneous":[41],"kernels,":[42],"and":[43,61,107,133,135,156],"depth":[45],"calls":[48],"a":[49,64,95,153,163,190],"device":[50],"can":[51],"support.":[52],"In":[53],"this":[54],"paper,":[55],"we":[56],"propose":[57],"Kernel":[58,79,118],"Launch":[59],"Aggregation":[60],"Promotion":[62],"(KLAP),":[63],"set":[65],"compiler":[67,155],"techniques":[68,151],"improve":[70,116],"performance":[72],"kernels":[74,83,105,126],"which":[75],"use":[76],"parallelism.":[78,172],"aggregation":[81,161],"fuses":[82],"launched":[84],"by":[85,189],"threads":[86],"in":[87,152],"same":[89],"warp,":[90],"block,":[91],"or":[92],"into":[94],"single":[96],"aggregated":[97],"kernel,":[98],"thereby":[99],"reducing":[100],"total":[102],"spawned":[106],"increasing":[108],"amount":[110],"per":[113],"occupancy.":[117],"promotion":[120,179],"enables":[121,180],"early":[122],"child":[125],"extract":[128],"more":[129],"between":[131],"parents":[132],"children,":[134],"aggregate":[137],"launches":[139],"across":[140],"generations":[141],"mitigating":[142],"problem":[144],"depth.":[147],"We":[148,173],"implement":[149],"our":[150],"real":[154],"show":[157,175],"obtains":[162],"geometric":[164,191],"mean":[165,192],"speedup":[166],"6.58x":[168],"over":[169],"regular":[170],"also":[174],"cases":[181],"were":[183],"originally":[185],"possible,":[186],"improving":[187],"throughput":[188],"30.44":[194],"x.":[195]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
