{"id":"https://openalex.org/W2618092901","doi":"https://doi.org/10.1145/3070710","title":"Efficient Kernel Management on GPUs","display_name":"Efficient Kernel Management on GPUs","publication_year":2017,"publication_date":"2017-05-26","ids":{"openalex":"https://openalex.org/W2618092901","doi":"https://doi.org/10.1145/3070710","mag":"2618092901"},"language":"en","primary_location":{"id":"doi:10.1145/3070710","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3070710","pdf_url":null,"source":{"id":"https://openalex.org/S136160450","display_name":"ACM Transactions on Embedded Computing Systems","issn_l":"1539-9087","issn":["1539-9087","1558-3465"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Embedded Computing Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100604860","display_name":"Yun Liang","orcid":"https://orcid.org/0000-0002-9076-7998"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yun Liang","raw_affiliation_strings":["Peking University, China"],"affiliations":[{"raw_affiliation_string":"Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100663885","display_name":"Xiuhong Li","orcid":"https://orcid.org/0000-0002-5327-0907"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiuhong Li","raw_affiliation_strings":["Peking University, China"],"affiliations":[{"raw_affiliation_string":"Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100604860"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":2.0278,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.87903322,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"16","issue":"4","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9021775126457214},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6721755862236023},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.588995099067688},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.468420147895813},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.354594349861145}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9021775126457214},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6721755862236023},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.588995099067688},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.468420147895813},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.354594349861145},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3070710","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3070710","pdf_url":null,"source":{"id":"https://openalex.org/S136160450","display_name":"ACM Transactions on Embedded Computing Systems","issn_l":"1539-9087","issn":["1539-9087","1558-3465"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Embedded Computing Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1968047430","https://openalex.org/W1968391520","https://openalex.org/W1972077953","https://openalex.org/W1979527452","https://openalex.org/W1982996921","https://openalex.org/W1983235612","https://openalex.org/W1997162567","https://openalex.org/W1998886328","https://openalex.org/W2020572638","https://openalex.org/W2027806965","https://openalex.org/W2043420024","https://openalex.org/W2047060659","https://openalex.org/W2048441570","https://openalex.org/W2062527253","https://openalex.org/W2079038734","https://openalex.org/W2080592089","https://openalex.org/W2084110734","https://openalex.org/W2084309410","https://openalex.org/W2090584832","https://openalex.org/W2093043622","https://openalex.org/W2097643185","https://openalex.org/W2098274770","https://openalex.org/W2098505406","https://openalex.org/W2100409742","https://openalex.org/W2106018915","https://openalex.org/W2126830109","https://openalex.org/W2142444503","https://openalex.org/W2150851481","https://openalex.org/W2152517358","https://openalex.org/W2152956697","https://openalex.org/W2155568054","https://openalex.org/W2160428323","https://openalex.org/W2166918318","https://openalex.org/W2232645663","https://openalex.org/W2346977986","https://openalex.org/W2403204284","https://openalex.org/W2566334102","https://openalex.org/W2581065617","https://openalex.org/W4232385523","https://openalex.org/W4235295270","https://openalex.org/W4235366964","https://openalex.org/W4237024478","https://openalex.org/W4239017056","https://openalex.org/W4239965559"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2130043461","https://openalex.org/W2530322880","https://openalex.org/W1596801655"],"abstract_inverted_index":{"Graphics":[0],"Processing":[1],"Units":[2],"(GPUs)":[3],"have":[4,54],"been":[5,37,55],"widely":[6],"adopted":[7],"as":[8,101],"accelerators":[9],"for":[10,90,115,134,162],"compute-intensive":[11],"applications":[12,27],"due":[13],"to":[14,29,46,64,80,84,158,186,200],"its":[15,48],"tremendous":[16],"computational":[17],"power":[18],"and":[19,43,110,150,177],"high":[20],"memory":[21],"bandwidth.":[22],"As":[23],"the":[24,66,86,106,112,116,132,160,163,188,202,213,228],"complexity":[25],"of":[26,34,169,196],"continues":[28],"grow,":[30],"each":[31],"new":[32],"generation":[33],"GPUs":[35,53,89],"has":[36],"equipped":[38],"with":[39,57,227],"advanced":[40],"architectural":[41],"features":[42],"more":[44],"resources":[45,87],"sustain":[47],"performance":[49,99,133,214],"acceleration":[50],"capability.":[51],"Recent":[52],"featured":[56],"concurrent":[58,91,135,230],"kernel":[59,92,127,136,141,172,231],"execution,":[60],"which":[61],"is":[62,76,156,184],"designed":[63],"improve":[65,212],"resource":[67,113],"utilization":[68],"by":[69,191,215,221],"executing":[70,118,165],"multiple":[71],"kernels":[72],"simultaneously.":[73],"However,":[74],"it":[75],"still":[77],"a":[78,82,194,197],"challenge":[79],"find":[81],"way":[83],"manage":[85],"on":[88,138,218,224],"execution.":[93],"Prior":[94],"works":[95],"only":[96,192],"achieve":[97],"limited":[98],"improvement":[100],"they":[102],"do":[103],"not":[104],"optimize":[105],"thread-level":[107],"parallelism":[108],"(TLP)":[109],"model":[111],"contention":[114,190],"concurrently":[117,164],"kernels.":[119,166],"In":[120],"this":[121],"article,":[122],"we":[123],"design":[124],"an":[125],"efficient":[126],"management":[128,142],"framework":[129,143,210],"that":[130,208],"optimizes":[131],"execution":[137,232],"GPUs.":[139],"Our":[140],"contains":[144],"two":[145],"key":[146],"components:":[147],"TLP":[148,154,161,175,179],"modulation":[149,155],"cache":[151,182,189],"bypassing.":[152],"The":[153,181],"employed":[157],"adjust":[159],"It":[167],"consists":[168],"three":[170],"parts:":[171],"categorization,":[173],"static":[174],"modulation,":[176],"dynamic":[178],"modulation.":[180],"bypassing":[183],"proposed":[185],"mitigate":[187],"allowing":[193],"subset":[195],"kernel\u2019s":[198],"blocks":[199],"access":[201],"L1":[203],"data":[204],"cache.":[205],"Experiments":[206],"indicate":[207],"our":[209],"can":[211],"1.51":[216],"\u00d7":[217,223],"average":[219],"(energy-efficiency":[220],"1.39":[222],"average),":[225],"compared":[226],"default":[229],"framework.":[233]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
