{"id":"https://openalex.org/W4360831842","doi":"https://doi.org/10.1109/hpca56546.2023.10071121","title":"KRISP: Enabling Kernel-wise RIght-sizing for Spatial Partitioned GPU Inference Servers","display_name":"KRISP: Enabling Kernel-wise RIght-sizing for Spatial Partitioned GPU Inference Servers","publication_year":2023,"publication_date":"2023-02-01","ids":{"openalex":"https://openalex.org/W4360831842","doi":"https://doi.org/10.1109/hpca56546.2023.10071121"},"language":"en","primary_location":{"id":"doi:10.1109/hpca56546.2023.10071121","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca56546.2023.10071121","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011003147","display_name":"Marcus Chow","orcid":"https://orcid.org/0000-0002-2577-8914"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Marcus Chow","raw_affiliation_strings":["University of California,Department of Computer Science &#x0026; Engineering,Riverside"],"affiliations":[{"raw_affiliation_string":"University of California,Department of Computer Science &#x0026; Engineering,Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004077070","display_name":"Ali Jahanshahi","orcid":"https://orcid.org/0000-0002-4301-7588"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ali Jahanshahi","raw_affiliation_strings":["University of California,Department of Computer Science &#x0026; Engineering,Riverside"],"affiliations":[{"raw_affiliation_string":"University of California,Department of Computer Science &#x0026; Engineering,Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000712719","display_name":"Daniel Wong","orcid":"https://orcid.org/0000-0002-5376-7868"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Wong","raw_affiliation_strings":["University of California,Department of Computer Science &#x0026; Engineering,Riverside"],"affiliations":[{"raw_affiliation_string":"University of California,Department of Computer Science &#x0026; Engineering,Riverside","institution_ids":["https://openalex.org/I103635307"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5011003147"],"corresponding_institution_ids":["https://openalex.org/I103635307"],"apc_list":null,"apc_paid":null,"fwci":2.8159,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.92287491,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"624","last_page":"637"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.8606846332550049},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8180521726608276},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.7504372000694275},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.7044695615768433},{"id":"https://openalex.org/keywords/sizing","display_name":"Sizing","score":0.5092476606369019},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.4584185481071472},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4386621415615082},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34669506549835205},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1553703248500824},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08867904543876648}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.8606846332550049},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8180521726608276},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.7504372000694275},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.7044695615768433},{"id":"https://openalex.org/C2777767291","wikidata":"https://www.wikidata.org/wiki/Q1080291","display_name":"Sizing","level":2,"score":0.5092476606369019},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.4584185481071472},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4386621415615082},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34669506549835205},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1553703248500824},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08867904543876648},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca56546.2023.10071121","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca56546.2023.10071121","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":71,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W1598866093","https://openalex.org/W1686810756","https://openalex.org/W1973573211","https://openalex.org/W1985747326","https://openalex.org/W1997025365","https://openalex.org/W2038662769","https://openalex.org/W2085434892","https://openalex.org/W2105477603","https://openalex.org/W2134807578","https://openalex.org/W2142801765","https://openalex.org/W2194775991","https://openalex.org/W2271840356","https://openalex.org/W2279098554","https://openalex.org/W2323693848","https://openalex.org/W2396363708","https://openalex.org/W2549139847","https://openalex.org/W2605251767","https://openalex.org/W2625231790","https://openalex.org/W2626985503","https://openalex.org/W2734941459","https://openalex.org/W2786171709","https://openalex.org/W2787181861","https://openalex.org/W2794670651","https://openalex.org/W2883780447","https://openalex.org/W2891285910","https://openalex.org/W2901073342","https://openalex.org/W2925744458","https://openalex.org/W2963446712","https://openalex.org/W2996428491","https://openalex.org/W3034539665","https://openalex.org/W3043433718","https://openalex.org/W3043492469","https://openalex.org/W3084790829","https://openalex.org/W3097411828","https://openalex.org/W3102510044","https://openalex.org/W3117255825","https://openalex.org/W3186368531","https://openalex.org/W3198738574","https://openalex.org/W3202465876","https://openalex.org/W3208045756","https://openalex.org/W3209166877","https://openalex.org/W4234578094","https://openalex.org/W4235551121","https://openalex.org/W4243035950","https://openalex.org/W4245867598","https://openalex.org/W4246379307","https://openalex.org/W4280528304","https://openalex.org/W4280607375","https://openalex.org/W4280633167","https://openalex.org/W4289083201","https://openalex.org/W4293025059","https://openalex.org/W4295312788","https://openalex.org/W4297808460","https://openalex.org/W6635810480","https://openalex.org/W6637373629","https://openalex.org/W6694517276","https://openalex.org/W6695314431","https://openalex.org/W6730956707","https://openalex.org/W6738174876","https://openalex.org/W6747759466","https://openalex.org/W6757364461","https://openalex.org/W6766978945","https://openalex.org/W6768021236","https://openalex.org/W6769424276","https://openalex.org/W6796196852","https://openalex.org/W6798686915","https://openalex.org/W6801411073","https://openalex.org/W6802529931","https://openalex.org/W6809979497","https://openalex.org/W7067822191"],"related_works":["https://openalex.org/W2375311683","https://openalex.org/W2366062860","https://openalex.org/W2373777250","https://openalex.org/W2353956655","https://openalex.org/W2020653254","https://openalex.org/W2010454064","https://openalex.org/W2352072014","https://openalex.org/W217279133","https://openalex.org/W2393487946","https://openalex.org/W2931688134"],"abstract_inverted_index":{"Machine":[0],"learning":[1],"(ML)":[2],"inference":[3,14,64,98,135,142,155,164],"workloads":[4,15],"present":[5],"significantly":[6],"different":[7],"challenges":[8],"than":[9],"ML":[10],"training":[11],"workloads.":[12],"Typically,":[13],"are":[16],"shorter":[17],"running":[18,134],"and":[19,160],"under-utilize":[20],"GPU":[21,45,47,106],"resources.":[22,79],"To":[23],"overcome":[24],"this,":[25],"co-locating":[26],"multiple":[27],"instances":[28],"of":[29,39,93,114,120,132],"a":[30,73,129],"model":[31],"has":[32],"been":[33],"proposed":[34],"to":[35,77,88,110,138],"improve":[36],"the":[37,44,118],"utilization":[38],"GPUs.":[40],"Co-located":[41],"models":[42,136],"share":[43],"through":[46],"spatial":[48,115],"partitioning":[49],"facilities,":[50],"such":[51],"as":[52],"Nvidia\u2019s":[53],"MPS,":[54],"MIG,":[55],"or":[56],"AMD\u2019s":[57],"CU":[58],"Masking":[59],"API.":[60],"Existing":[61],"spatially":[62,140],"partitioned":[63,141],"servers":[65],"create":[66],"model-wise":[67,83],"partitions":[68,116],"by":[69,148,165],"\"right-sizing\"":[70],"based":[71],"on":[72],"model\u2019s":[74],"latency":[75],"tolerance":[76,92],"restricting":[78],"We":[80,123],"show":[81],"that":[82,125],"right-sizing":[84,113],"is":[85],"under-utilized":[86],"due":[87],"varying":[89],"resource":[90],"restriction":[91],"individual":[94,121],"kernels":[95],"within":[96],"an":[97,153],"pass.We":[99],"propose":[100],"Kernel-wise":[101],"Right-sizing":[102],"for":[103],"Spatial":[104],"Partitioned":[105],"Inference":[107],"Servers":[108],"(KRISP)":[109],"enable":[111],"kernel-wise":[112],"at":[117],"granularity":[119],"kernels.":[122],"demonstrate":[124],"KRISP":[126,144],"can":[127],"support":[128],"greater":[130],"level":[131],"concurrently":[133],"compared":[137,151],"existing":[139],"servers.":[143],"improves":[145],"overall":[146],"throughput":[147],"2x":[149],"when":[150],"with":[152],"isolated":[154],"(1.22x":[156],"vs":[157],"prior":[158],"works)":[159],"reduce":[161],"energy":[162],"per":[163],"33%.":[166]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":2}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
