{"id":"https://openalex.org/W4403296910","doi":"https://doi.org/10.1109/tc.2024.3477995","title":"Adaptive Kernel Fusion for Improving the GPU Utilization While Ensuring\u00a0QoS","display_name":"Adaptive Kernel Fusion for Improving the GPU Utilization While Ensuring\u00a0QoS","publication_year":2024,"publication_date":"2024-10-10","ids":{"openalex":"https://openalex.org/W4403296910","doi":"https://doi.org/10.1109/tc.2024.3477995"},"language":"en","primary_location":{"id":"doi:10.1109/tc.2024.3477995","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2024.3477995","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063596525","display_name":"Han Zhao","orcid":"https://orcid.org/0000-0002-1561-5329"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Han Zhao","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","Department of Computer Science and EngineeringShanghai Jiao Tong University"],"raw_orcid":"https://orcid.org/0000-0002-1561-5329","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and EngineeringShanghai Jiao Tong University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113635026","display_name":"J Deng","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junxiao Deng","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","Department of Computer Science and EngineeringShanghai Jiao Tong University"],"raw_orcid":"https://orcid.org/0009-0003-1239-5134","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and EngineeringShanghai Jiao Tong University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008837660","display_name":"Weihao Cui","orcid":"https://orcid.org/0000-0002-6646-5260"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihao Cui","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","Department of Computer Science and EngineeringShanghai Jiao Tong University"],"raw_orcid":"https://orcid.org/0000-0002-6646-5260","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and EngineeringShanghai Jiao Tong University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377840","display_name":"Quan Chen","orcid":"https://orcid.org/0000-0001-5832-0347"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Chen","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","Department of Computer Science and EngineeringShanghai Jiao Tong University"],"raw_orcid":"https://orcid.org/0000-0001-5832-0347","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and EngineeringShanghai Jiao Tong University","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026996875","display_name":"Youtao Zhang","orcid":"https://orcid.org/0000-0001-8425-8743"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]},{"id":"https://openalex.org/I4210156583","display_name":"Laboratoire d'Informatique de Paris-Nord","ror":"https://ror.org/05g1zjw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I4210091279","https://openalex.org/I4210156583","https://openalex.org/I4210159245"]}],"countries":["FR","US"],"is_corresponding":false,"raw_author_name":"Youtao Zhang","raw_affiliation_strings":["Computer Science Department, University of Pittsburgh, Pittsburgh, PA, USA","Computer Science DepartmentUniversity of Pittsburgh"],"raw_orcid":"https://orcid.org/0000-0001-8425-8743","affiliations":[{"raw_affiliation_string":"Computer Science Department, University of Pittsburgh, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I170201317"]},{"raw_affiliation_string":"Computer Science DepartmentUniversity of Pittsburgh","institution_ids":["https://openalex.org/I4210156583","https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077093242","display_name":"Deze Zeng","orcid":"https://orcid.org/0000-0003-3276-1202"},"institutions":[{"id":"https://openalex.org/I3124059619","display_name":"China University of Geosciences","ror":"https://ror.org/04gcegc37","country_code":"CN","type":"education","lineage":["https://openalex.org/I3124059619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Deze Zeng","raw_affiliation_strings":["School of Computer Science, China University of Geosciences, Wuhan, China","School of Computer ScienceChina University of Geosciences"],"raw_orcid":"https://orcid.org/0000-0003-3276-1202","affiliations":[{"raw_affiliation_string":"School of Computer Science, China University of Geosciences, Wuhan, China","institution_ids":["https://openalex.org/I3124059619"]},{"raw_affiliation_string":"School of Computer ScienceChina University of Geosciences","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039318240","display_name":"Minyi Guo","orcid":"https://orcid.org/0000-0003-0034-2302"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minyi Guo","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","Department of Computer Science and EngineeringShanghai Jiao Tong University"],"raw_orcid":"https://orcid.org/0000-0003-0034-2302","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Department of Computer Science and EngineeringShanghai Jiao Tong University","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5063596525"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.2353,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.56740741,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"74","issue":"2","first_page":"386","last_page":"400"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.8616999983787537,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.8616999983787537,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13717","display_name":"Advanced Algorithms and Applications","score":0.7615000009536743,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10052","display_name":"Medical Image Segmentation Techniques","score":0.7390000224113464,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7150735855102539},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5442668199539185},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5303139090538025},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.43118345737457275},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0898796021938324}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7150735855102539},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5442668199539185},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5303139090538025},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.43118345737457275},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0898796021938324},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tc.2024.3477995","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tc.2024.3477995","pdf_url":null,"source":{"id":"https://openalex.org/S157670870","display_name":"IEEE Transactions on Computers","issn_l":"0018-9340","issn":["0018-9340","1557-9956","2326-3814"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.800000011920929}],"awards":[{"id":"https://openalex.org/G1702756715","display_name":null,"funder_award_id":"2022YFB4501400","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G2465329715","display_name":null,"funder_award_id":"61832006","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2857700228","display_name":null,"funder_award_id":"62232011","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7352368754","display_name":null,"funder_award_id":"62302302","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7942366687","display_name":null,"funder_award_id":"62022057","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1973573211","https://openalex.org/W2038666141","https://openalex.org/W2072652898","https://openalex.org/W2079038734","https://openalex.org/W2080592089","https://openalex.org/W2130336316","https://openalex.org/W2134427337","https://openalex.org/W2142801765","https://openalex.org/W2194775991","https://openalex.org/W2294600019","https://openalex.org/W2323693848","https://openalex.org/W2604514113","https://openalex.org/W2625231790","https://openalex.org/W2793085784","https://openalex.org/W2794670651","https://openalex.org/W2795046307","https://openalex.org/W2795326697","https://openalex.org/W2982157693","https://openalex.org/W3005664618","https://openalex.org/W3011377185","https://openalex.org/W3043571714","https://openalex.org/W3208777667","https://openalex.org/W4206933065","https://openalex.org/W4231332361","https://openalex.org/W4250180141","https://openalex.org/W4280633999","https://openalex.org/W4312547955","https://openalex.org/W4396686649","https://openalex.org/W6604681715","https://openalex.org/W6637373629","https://openalex.org/W6694513646","https://openalex.org/W6779103662","https://openalex.org/W6784999070","https://openalex.org/W6801004768"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W4402327032","https://openalex.org/W2382290278"],"abstract_inverted_index":{"The":[0,122,135],"prosperity":[1],"of":[2,11,95,100,177,210,227,242],"machine":[3],"learning":[4],"applications":[5,229],"has":[6],"promoted":[7],"the":[8,30,59,68,88,92,126,158,170,175,178,182,188,194,198,202,207,215,225,240],"rapid":[9],"development":[10],"GPU":[12,31],"architecture.":[13],"It":[14],"continues":[15],"to":[16,36,83,213],"integrate":[17],"more":[18],"CUDA":[19,144,149,154],"Cores,":[20],"larger":[21],"L2":[22],"cache":[23],"and":[24,62,80,116,128,143,152,181],"memory":[25],"bandwidth":[26],"within":[27,67],"SM.":[28,69],"Moreover,":[29],"integrates":[32],"Tensor":[33,140],"Core":[34,141,145,150,155],"dedicated":[35],"matrix":[37],"multiplication.":[38],"Although":[39],"studies":[40],"have":[41],"shown":[42],"that":[43,222],"task":[44],"co-location":[45],"could":[46,138],"effectively":[47],"improve":[48,64,84,214],"system":[49,216],"throughput,":[50],"existing":[51],"works":[52],"only":[53],"focus":[54],"on":[55,206,236],"resource":[56,65,85],"scheduling":[57,81],"at":[58],"SM":[60,89],"level":[61],"cannot":[63],"utilization":[66,86],"In":[70],"this":[71],"paper,":[72],"we":[73],"propose":[74],"Aker,":[75],"a":[76,101,105,132,167],"static":[77,102,127],"kernel":[78,103,114,120,123,133,136,142,151,159,164,168,185,191,195,200,204],"fusion":[79,130],"approach":[82],"inside":[87],"while":[90,238],"ensuring":[91,239],"QoS":[93,208,241],"(Quality-of-Service)":[94],"co-located":[96],"tasks.":[97,244],"Aker":[98,223],"consists":[99],"fuser,":[104],"duration":[106,171,176],"predictor":[107,172],"for":[108,131,166],"fused":[109,113,163,179,184,190,199],"kernels,":[110],"an":[111,117],"adaptive":[112,183],"selector,":[115],"enhanced":[118],"QoS-aware":[119],"manager.":[121],"fuser":[124,160],"enables":[125],"flexible":[129],"pair.":[134],"pair":[137],"be":[139],"kernel,":[146],"or":[147,201],"computing-prefer":[148],"memory-prefer":[153],"kernel.":[156],"After":[157],"provides":[161],"multiple":[162],"versions":[165],"pair,":[169],"precisely":[173],"predicts":[174],"kernels":[180],"selector":[186],"locates":[187],"optimal":[189],"version.":[192],"Finally,":[193],"manager":[196],"invokes":[197],"original":[203],"based":[205],"headroom":[209],"latency-critical":[211,243],"tasks":[212],"throughput.":[217],"Our":[218],"experimental":[219],"results":[220],"show":[221],"improves":[224],"throughput":[226],"best-effort":[228],"compared":[230],"with":[231],"state-of-the-art":[232],"solutions":[233],"by":[234],"50.1%":[235],"average,":[237]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
