{"id":"https://openalex.org/W4416963663","doi":"https://doi.org/10.1109/islped65674.2025.11261793","title":"ECLIP: Energy-efficient and Practical Co-Location of ML Inference on Spatially Partitioned GPUs","display_name":"ECLIP: Energy-efficient and Practical Co-Location of ML Inference on Spatially Partitioned GPUs","publication_year":2025,"publication_date":"2025-08-06","ids":{"openalex":"https://openalex.org/W4416963663","doi":"https://doi.org/10.1109/islped65674.2025.11261793"},"language":null,"primary_location":{"id":"doi:10.1109/islped65674.2025.11261793","is_oa":false,"landing_page_url":"https://doi.org/10.1109/islped65674.2025.11261793","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM International Symposium on Low Power Electronics and Design (ISLPED)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ryan Quach","orcid":null},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ryan Quach","raw_affiliation_strings":["University of California,Riverside"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California,Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100783325","display_name":"Yidi Wang","orcid":"https://orcid.org/0000-0001-6122-9552"},"institutions":[{"id":"https://openalex.org/I16269868","display_name":"Santa Clara University","ror":"https://ror.org/03ypqe447","country_code":"US","type":"education","lineage":["https://openalex.org/I16269868"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yidi Wang","raw_affiliation_strings":["Santa Clara University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Santa Clara University","institution_ids":["https://openalex.org/I16269868"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004077070","display_name":"Ali Jahanshahi","orcid":"https://orcid.org/0000-0002-4301-7588"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ali Jahanshahi","raw_affiliation_strings":["University of California,Riverside"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California,Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000712719","display_name":"Daniel Wong","orcid":"https://orcid.org/0000-0002-5376-7868"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Wong","raw_affiliation_strings":["University of California,Riverside"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California,Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081504885","display_name":"Hyoseung Kim","orcid":"https://orcid.org/0000-0002-8553-732X"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hyoseung Kim","raw_affiliation_strings":["University of California,Riverside"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of California,Riverside","institution_ids":["https://openalex.org/I103635307"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37824098,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5113999843597412,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5113999843597412,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.2287999987602234,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.04569999873638153,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.8472999930381775},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.6474000215530396},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.6090999841690063},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5273000001907349},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.474700003862381},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.44749999046325684},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.42260000109672546},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.41179999709129333}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.8472999930381775},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7245000004768372},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.6474000215530396},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.6090999841690063},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5273000001907349},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.474700003862381},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.44749999046325684},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.42260000109672546},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.41179999709129333},{"id":"https://openalex.org/C2777480716","wikidata":"https://www.wikidata.org/wiki/Q23582796","display_name":"Resource consumption","level":2,"score":0.4106000065803528},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.39410001039505005},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.36469998955726624},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.3395000100135803},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.326200008392334},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.3165999948978424},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.3149999976158142},{"id":"https://openalex.org/C2988147884","wikidata":"https://www.wikidata.org/wiki/Q5377024","display_name":"Energy expenditure","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2973000109195709},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2870999872684479},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.28540000319480896},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.28349998593330383},{"id":"https://openalex.org/C14961307","wikidata":"https://www.wikidata.org/wiki/Q5377176","display_name":"Energy minimization","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C2985973956","wikidata":"https://www.wikidata.org/wiki/Q1617745","display_name":"High energy","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26669999957084656},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/islped65674.2025.11261793","is_oa":false,"landing_page_url":"https://doi.org/10.1109/islped65674.2025.11261793","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/ACM International Symposium on Low Power Electronics and Design (ISLPED)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2323693848","https://openalex.org/W2522765117","https://openalex.org/W2535693830","https://openalex.org/W2549139847","https://openalex.org/W2605178034","https://openalex.org/W2788386530","https://openalex.org/W2883780447","https://openalex.org/W2891285910","https://openalex.org/W2979642495","https://openalex.org/W3014810041","https://openalex.org/W3084790829","https://openalex.org/W3097411828","https://openalex.org/W3186368531","https://openalex.org/W3206857466","https://openalex.org/W3217391893","https://openalex.org/W4200404225","https://openalex.org/W4243035950","https://openalex.org/W4280528304","https://openalex.org/W4293025059","https://openalex.org/W4312198983","https://openalex.org/W4360831842","https://openalex.org/W4381885923","https://openalex.org/W4399117317","https://openalex.org/W4399147211","https://openalex.org/W4402044606"],"related_works":[],"abstract_inverted_index":{"As":[0],"AI":[1],"inference":[2,16,89],"becomes":[3],"mainstream,":[4],"research":[5],"has":[6],"begun":[7],"to":[8,80,107,124,129],"focus":[9],"on":[10],"improving":[11],"the":[12,44],"energy":[13,36,64,130],"consumption":[14],"of":[15,98,109,121],"servers.":[17],"Inference":[18],"kernels":[19,110],"commonly":[20],"underutilize":[21],"a":[22,78],"GPU\u2019s":[23],"compute":[24],"resources":[25],"and":[26,35,42,102,126],"waste":[27,62],"power":[28],"from":[29],"idling":[30],"components.":[31],"To":[32],"improve":[33],"utilization":[34],"efficiency,":[37],"multiple":[38],"models":[39],"can":[40,61],"co-locate":[41],"share":[43],"GPU.":[45],"However,":[46],"typical":[47],"GPU":[48],"spatial":[49,58],"partitioning":[50,86],"techniques":[51],"often":[52],"experience":[53],"significant":[54],"overheads":[55,67,94],"when":[56],"reconfiguring":[57],"partitions,":[59],"which":[60],"additional":[63],"through":[65,111],"repartitioning":[66,93],"or":[68],"non-optimal":[69],"partition":[70],"configurations.":[71],"In":[72],"this":[73],"paper,":[74],"we":[75],"present":[76],"ECLIP,":[77],"framework":[79],"enable":[81],"low-overhead":[82],"energy-efficient":[83],"kernel-wise":[84],"resource":[85,113],"between":[87],"co-located":[88],"kernels.":[90],"ECLIP":[91,117],"minimizes":[92],"by":[95],"pre-allocating":[96],"pools":[97],"CU":[99,105],"masked":[100],"streams":[101],"assigns":[103],"optimal":[104],"assignments":[106],"groups":[108],"our":[112],"allocation":[114],"optimizer.":[115],"Overall,":[116],"achieves":[118],"an":[119],"average":[120],"13%":[122],"improvement":[123,128],"throughput":[125],"25%":[127],"efficiency.":[131]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-03T00:00:00"}
