{"id":"https://openalex.org/W7154957895","doi":"https://doi.org/10.1109/tcc.2026.3685862","title":"HASE: Hardware-Aware Scheduling for Inference Tasks in Heterogeneous GPU Clusters","display_name":"HASE: Hardware-Aware Scheduling for Inference Tasks in Heterogeneous GPU Clusters","publication_year":2026,"publication_date":"2026-04-01","ids":{"openalex":"https://openalex.org/W7154957895","doi":"https://doi.org/10.1109/tcc.2026.3685862"},"language":null,"primary_location":{"id":"doi:10.1109/tcc.2026.3685862","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tcc.2026.3685862","pdf_url":null,"source":{"id":"https://openalex.org/S2492498579","display_name":"IEEE Transactions on Cloud Computing","issn_l":"2168-7161","issn":["2168-7161","2372-0018"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cloud Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1109/tcc.2026.3685862","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100674347","display_name":"Yiqun Chen","orcid":"https://orcid.org/0000-0003-0672-9217"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanqi Chen","raw_affiliation_strings":["School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036326577","display_name":"Congfeng Jiang","orcid":"https://orcid.org/0000-0003-3592-0328"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Congfeng Jiang","raw_affiliation_strings":["School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0003-3592-0328","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048629905","display_name":"Chunpeng Wu","orcid":"https://orcid.org/0000-0002-3970-8570"},"institutions":[{"id":"https://openalex.org/I153473198","display_name":"North China Electric Power University","ror":"https://ror.org/04qr5t414","country_code":"CN","type":"education","lineage":["https://openalex.org/I153473198"]},{"id":"https://openalex.org/I4392738113","display_name":"China Electric Power Research Institute","ror":"https://ror.org/05ehpzy81","country_code":null,"type":"facility","lineage":["https://openalex.org/I17442442","https://openalex.org/I4392738113"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunpeng Wu","raw_affiliation_strings":["China Electric Power Research Institute, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"China Electric Power Research Institute, Beijing, China","institution_ids":["https://openalex.org/I153473198","https://openalex.org/I4392738113"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134014824","display_name":"Yue Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I153473198","display_name":"North China Electric Power University","ror":"https://ror.org/04qr5t414","country_code":"CN","type":"education","lineage":["https://openalex.org/I153473198"]},{"id":"https://openalex.org/I4392738113","display_name":"China Electric Power Research Institute","ror":"https://ror.org/05ehpzy81","country_code":null,"type":"facility","lineage":["https://openalex.org/I17442442","https://openalex.org/I4392738113"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Wang","raw_affiliation_strings":["China Electric Power Research Institute, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"China Electric Power Research Institute, Beijing, China","institution_ids":["https://openalex.org/I153473198","https://openalex.org/I4392738113"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134083580","display_name":"Qinghe Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I153473198","display_name":"North China Electric Power University","ror":"https://ror.org/04qr5t414","country_code":"CN","type":"education","lineage":["https://openalex.org/I153473198"]},{"id":"https://openalex.org/I4392738113","display_name":"China Electric Power Research Institute","ror":"https://ror.org/05ehpzy81","country_code":null,"type":"facility","lineage":["https://openalex.org/I17442442","https://openalex.org/I4392738113"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qinghe Ye","raw_affiliation_strings":["China Electric Power Research Institute, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"China Electric Power Research Institute, Beijing, China","institution_ids":["https://openalex.org/I153473198","https://openalex.org/I4392738113"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134005491","display_name":"Longchuan Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I17442442","display_name":"State Grid Corporation of China (China)","ror":"https://ror.org/05twwhs70","country_code":"CN","type":"company","lineage":["https://openalex.org/I17442442"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longchuan Yan","raw_affiliation_strings":["State Grid Information Telecommunication Branch, State Grid, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Grid Information Telecommunication Branch, State Grid, Beijing, China","institution_ids":["https://openalex.org/I17442442"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134056775","display_name":"Jianing Niu","orcid":null},"institutions":[{"id":"https://openalex.org/I17442442","display_name":"State Grid Corporation of China (China)","ror":"https://ror.org/05twwhs70","country_code":"CN","type":"company","lineage":["https://openalex.org/I17442442"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianing Niu","raw_affiliation_strings":["State Grid Information Telecommunication Branch, State Grid, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Grid Information Telecommunication Branch, State Grid, Beijing, China","institution_ids":["https://openalex.org/I17442442"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056562866","display_name":"J J Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junming Liu","raw_affiliation_strings":["Supercomputing Center and IT Center, Hangzhou Dianzi University, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Supercomputing Center and IT Center, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032551629","display_name":"Lingjia Lao","orcid":null},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingjia Lao","raw_affiliation_strings":["School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.62178743,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":"2","first_page":"1202","last_page":"1216"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.375900000333786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.375900000333786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.15150000154972076,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.12200000137090683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6074000000953674},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6061999797821045},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.5296000242233276},{"id":"https://openalex.org/keywords/processor-scheduling","display_name":"Processor scheduling","score":0.4066999852657318},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.3125}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8578000068664551},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6074000000953674},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6061999797821045},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.5296000242233276},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4228000044822693},{"id":"https://openalex.org/C2984822820","wikidata":"https://www.wikidata.org/wiki/Q1123036","display_name":"Processor scheduling","level":3,"score":0.4066999852657318},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.39340001344680786},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.3125},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.30149999260902405},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.27900001406669617},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcc.2026.3685862","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tcc.2026.3685862","pdf_url":null,"source":{"id":"https://openalex.org/S2492498579","display_name":"IEEE Transactions on Cloud Computing","issn_l":"2168-7161","issn":["2168-7161","2372-0018"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cloud Computing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/tcc.2026.3685862","is_oa":true,"landing_page_url":"https://doi.org/10.1109/tcc.2026.3685862","pdf_url":null,"source":{"id":"https://openalex.org/S2492498579","display_name":"IEEE Transactions on Cloud Computing","issn_l":"2168-7161","issn":["2168-7161","2372-0018"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cloud Computing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"When":[0],"processing":[1],"large-scale":[2],"co-located":[3,30],"inference":[4,154,228],"workloads":[5,44,47,229],"in":[6,37,101,283],"heterogeneous":[7],"GPU":[8,55,60,76,103,116,173],"clusters,":[9],"existing":[10],"cluster":[11],"scheduling":[12,110,140,200,277],"mechanisms":[13],"often":[14],"increase":[15],"the":[16,22,83,188],"job":[17,88,267],"makespan":[18,268],"due":[19],"to":[20,51,75,115,217,270,287],"neglecting":[21],"mutual":[23],"performance":[24],"interference":[25],"and":[26,59,70,137,165,183,194,214,221,234,250,274],"resource":[27,212],"contention":[28],"for":[29,81,98,119,207],"tasks.":[31],"This":[32],"deficiency":[33],"is":[34,79],"dramatically":[35],"amplified":[36],"typical":[38],"neural":[39],"networks":[40],"based":[41],"deep":[42],"learning":[43],"because":[45],"these":[46],"are":[48],"highly":[49],"sensitive":[50],"hardware":[52,77,117,179],"configurations":[53],"like":[54],"memory":[56],"capacity,":[57],"bandwidth":[58],"core":[61,130],"frequency.":[62],"Therefore,":[63],"a":[64,108,132,138,203,211,243],"hardware-performance-aware":[65],"scheduler":[66],"capable":[67],"of":[68,87,128,247,253,289],"adaptively":[69],"dynamically":[71,113],"dispatching":[72],"tasks":[73],"according":[74],"characteristics":[78,118],"crucial":[80],"reducing":[82],"overall":[84],"completion":[85],"time":[86,279],"queues.":[89],"To":[90],"address":[91],"this":[92],"issue,":[93],"we":[94],"propose":[95],"Hardware-Aware":[96],"Scheduling":[97],"Inference":[99],"Tasks":[100],"Heterogeneous":[102],"Clusters":[104],"(<italic":[105],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[106,125,240,255,262],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">HASE</i>),":[107],"novel":[109],"framework":[111],"that":[112,145,238],"adapts":[114],"real-time":[120,184],"optimal":[121],"task":[122,208],"placement.":[123],"<italic":[124,239,261],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">HASE</i>":[126,241,263],"consists":[127],"two":[129],"components:":[131],"kernel-level":[133,244],"latency":[134],"prediction":[135,245],"model":[136],"hybrid":[139,199],"strategy.":[141],"Unlike":[142],"conventional":[143],"approaches":[144],"rely":[146],"on":[147,226],"coarse-grained":[148],"model-level":[149,251],"features,":[150],"our":[151],"predictor":[152,189],"decomposes":[153],"models":[155],"into":[156],"fine-grained":[157],"computational":[158],"kernels":[159],"using":[160],"ONNX":[161],"Runtime":[162],"graph":[163],"optimization,":[164],"predicts":[166],"individual":[167],"kernel":[168],"execution":[169],"times":[170],"under":[171,280],"varying":[172],"load":[174,196],"conditions.":[175],"By":[176],"integrating":[177],"static":[178],"specifications,":[180],"dynamic":[181],"microbenchmarks,":[182],"DCGM":[185],"profiling":[186],"metrics,":[187],"captures":[190],"both":[191],"operator-level":[192],"heterogeneity":[193],"background":[195],"interference.":[197],"The":[198],"strategy":[201],"combines":[202],"two-stage":[204],"greedy":[205],"search":[206],"placement":[209],"with":[210,285],"reservation":[213],"backfilling":[215],"mechanism":[216],"balance":[218],"immediate":[219],"optimization":[220],"long-term":[222],"fairness.":[223],"Experimental":[224],"results":[225],"CNN-based":[227],"(YOLO,":[230],"ResNet,":[231],"VGG,":[232],"DenseNet,":[233],"MobileNet":[235],"series)":[236],"demonstrate":[237],"achieves":[242],"accuracy":[246,252],"8.2%":[248],"MAPE":[249],"<inline-formula":[254],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[256],"notation=\"LaTeX\">$R^{2}$</tex-math></inline-formula>":[257],"=":[258],"0.91.":[259],"Moreover,":[260],"reduces":[264],"51%":[265],"total":[266],"compared":[269],"traditional":[271],"round-robin":[272],"scheduling,":[273],"maintains":[275],"per-task":[276],"decision":[278],"one":[281],"second":[282],"clusters":[284],"up":[286],"hundreds":[288],"GPUs.":[290]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-21T00:00:00"}
