{"id":"https://openalex.org/W4368755837","doi":"https://doi.org/10.1145/3588195.3592997","title":"Kairos: Building Cost-Efficient Machine Learning Inference Systems with Heterogeneous Cloud Resources","display_name":"Kairos: Building Cost-Efficient Machine Learning Inference Systems with Heterogeneous Cloud Resources","publication_year":2023,"publication_date":"2023-08-07","ids":{"openalex":"https://openalex.org/W4368755837","doi":"https://doi.org/10.1145/3588195.3592997"},"language":"en","primary_location":{"id":"doi:10.1145/3588195.3592997","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3588195.3592997","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd International Symposium on High-Performance Parallel and Distributed Computing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100733204","display_name":"Baolin Li","orcid":"https://orcid.org/0000-0001-9778-1023"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Baolin Li","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108029794","display_name":"Siddharth Samsi","orcid":"https://orcid.org/0009-0005-4937-6054"},"institutions":[{"id":"https://openalex.org/I4210122954","display_name":"MIT Lincoln Laboratory","ror":"https://ror.org/022z6jk58","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210122954","https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siddharth Samsi","raw_affiliation_strings":["MIT Lincoln Laboratory, Lexington, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT Lincoln Laboratory, Lexington, MA, USA","institution_ids":["https://openalex.org/I4210122954"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043450560","display_name":"Vijay Gadepally","orcid":"https://orcid.org/0000-0002-4598-2808"},"institutions":[{"id":"https://openalex.org/I4210122954","display_name":"MIT Lincoln Laboratory","ror":"https://ror.org/022z6jk58","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210122954","https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vijay Gadepally","raw_affiliation_strings":["MIT Lincoln Laboratory, Lexington, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT Lincoln Laboratory, Lexington, MA, USA","institution_ids":["https://openalex.org/I4210122954"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074406596","display_name":"Devesh Tiwari","orcid":null},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Devesh Tiwari","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100733204"],"corresponding_institution_ids":["https://openalex.org/I12912129"],"apc_list":null,"apc_paid":null,"fwci":4.983,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.95423439,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3","last_page":"16"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8110949993133545},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.7307106256484985},{"id":"https://openalex.org/keywords/kairos","display_name":"Kairos","score":0.647178053855896},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6463054418563843},{"id":"https://openalex.org/keywords/quality-of-service","display_name":"Quality of service","score":0.6276309490203857},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6198214292526245},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.5957261323928833},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.5800010561943054},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5410110354423523},{"id":"https://openalex.org/keywords/revenue","display_name":"Revenue","score":0.513230562210083},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5004978179931641},{"id":"https://openalex.org/keywords/service","display_name":"Service (business)","score":0.41824042797088623},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.301261842250824},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30033302307128906},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2970987856388092},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.257434606552124}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8110949993133545},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.7307106256484985},{"id":"https://openalex.org/C2779768813","wikidata":"https://www.wikidata.org/wiki/Q845756","display_name":"Kairos","level":2,"score":0.647178053855896},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6463054418563843},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.6276309490203857},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6198214292526245},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.5957261323928833},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.5800010561943054},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5410110354423523},{"id":"https://openalex.org/C195487862","wikidata":"https://www.wikidata.org/wiki/Q850210","display_name":"Revenue","level":2,"score":0.513230562210083},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5004978179931641},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.41824042797088623},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.301261842250824},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30033302307128906},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2970987856388092},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.257434606552124},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C136264566","wikidata":"https://www.wikidata.org/wiki/Q159810","display_name":"Economy","level":1,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3588195.3592997","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3588195.3592997","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd International Symposium on High-Performance Parallel and Distributed Computing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6100000143051147,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[{"id":"https://openalex.org/G4728703731","display_name":null,"funder_award_id":"A8750-19-2-1000","funder_id":"https://openalex.org/F4320332467","funder_display_name":"U.S. Air Force"},{"id":"https://openalex.org/G5174054272","display_name":null,"funder_award_id":"FA8702-15-D-0001","funder_id":"https://openalex.org/F4320332467","funder_display_name":"U.S. Air Force"},{"id":"https://openalex.org/G6856159117","display_name":null,"funder_award_id":"Contract No. FA8702-15-D-0001","funder_id":"https://openalex.org/F4320332467","funder_display_name":"U.S. Air Force"},{"id":"https://openalex.org/G7647474342","display_name":null,"funder_award_id":"FA8750-19-2-1000","funder_id":"https://openalex.org/F4320332467","funder_display_name":"U.S. Air Force"},{"id":"https://openalex.org/G865730642","display_name":null,"funder_award_id":"Air Force Contract No. FA8702-15-D-0001","funder_id":"https://openalex.org/F4320332467","funder_display_name":"U.S. Air Force"}],"funders":[{"id":"https://openalex.org/F4320332467","display_name":"U.S. Air Force","ror":"https://ror.org/006gmme17"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":76,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W1574447377","https://openalex.org/W2038249652","https://openalex.org/W2046825771","https://openalex.org/W2102166244","https://openalex.org/W2104144451","https://openalex.org/W2139688603","https://openalex.org/W2149294210","https://openalex.org/W2150139096","https://openalex.org/W2156077332","https://openalex.org/W2162656786","https://openalex.org/W2261536765","https://openalex.org/W2295575944","https://openalex.org/W2335814492","https://openalex.org/W2475334473","https://openalex.org/W2522214268","https://openalex.org/W2528800812","https://openalex.org/W2557169239","https://openalex.org/W2565600385","https://openalex.org/W2605350416","https://openalex.org/W2612225380","https://openalex.org/W2734941459","https://openalex.org/W2757024591","https://openalex.org/W2760528149","https://openalex.org/W2768993348","https://openalex.org/W2794670651","https://openalex.org/W2796889653","https://openalex.org/W2885579974","https://openalex.org/W2910482069","https://openalex.org/W2919064223","https://openalex.org/W2928897890","https://openalex.org/W2931122162","https://openalex.org/W2952777853","https://openalex.org/W2956461999","https://openalex.org/W2962745591","https://openalex.org/W2971412597","https://openalex.org/W2973172293","https://openalex.org/W2990714382","https://openalex.org/W2994659735","https://openalex.org/W3014810041","https://openalex.org/W3016842236","https://openalex.org/W3017091196","https://openalex.org/W3021746702","https://openalex.org/W3033527233","https://openalex.org/W3035192036","https://openalex.org/W3043433718","https://openalex.org/W3043571714","https://openalex.org/W3080999573","https://openalex.org/W3095488153","https://openalex.org/W3097108668","https://openalex.org/W3105381414","https://openalex.org/W3109610142","https://openalex.org/W3130104841","https://openalex.org/W3130281579","https://openalex.org/W3130689885","https://openalex.org/W3156127671","https://openalex.org/W3158444059","https://openalex.org/W3196199836","https://openalex.org/W3203600045","https://openalex.org/W3208777667","https://openalex.org/W3209166877","https://openalex.org/W3210617645","https://openalex.org/W3210776666","https://openalex.org/W4210580908","https://openalex.org/W4226064176","https://openalex.org/W4235679005","https://openalex.org/W4236491543","https://openalex.org/W4253824360","https://openalex.org/W4280507006","https://openalex.org/W4300633510","https://openalex.org/W4304192541","https://openalex.org/W6638636849","https://openalex.org/W6691209370","https://openalex.org/W6778330239","https://openalex.org/W6798686915","https://openalex.org/W6966713062"],"related_works":["https://openalex.org/W4306655454","https://openalex.org/W346698204","https://openalex.org/W3156293767","https://openalex.org/W2483128759","https://openalex.org/W2149333287","https://openalex.org/W2430062301","https://openalex.org/W2078379271","https://openalex.org/W2043460294","https://openalex.org/W4210712718","https://openalex.org/W1905398909"],"abstract_inverted_index":{"Online":[0],"inference":[1,77],"is":[2],"becoming":[3],"a":[4,40,54,65],"key":[5],"service":[6],"product":[7],"for":[8],"many":[9],"businesses,":[10],"deployed":[11],"in":[12],"cloud":[13],"platforms":[14],"to":[15,26,63,95,110,119],"meet":[16],"customer":[17],"demands.":[18],"Despite":[19],"their":[20,121],"revenue-generation":[21],"capability,":[22],"these":[23],"services":[24],"need":[25],"operate":[27],"under":[28],"tight":[29],"Quality-of-Service":[30],"(QoS)":[31],"and":[32,53,59,75,104],"cost":[33,55],"budget":[34],"constraints.":[35],"This":[36],"paper":[37],"introduces":[38],"KAIROS,":[39],"novel":[41,61],"runtime":[42],"framework":[43],"that":[44,91],"maximizes":[45],"the":[46,97,116],"query":[47],"throughput":[48,98],"while":[49],"meeting":[50],"QoS":[51],"target":[52],"budget.":[56],"KAIROS":[57,92],"designs":[58],"implements":[60],"techniques":[62],"build":[64],"pool":[66],"of":[67,99,115],"heterogeneous":[68],"compute":[69],"hardware":[70],"without":[71],"online":[72],"exploration":[73,122],"overhead,":[74],"distribute":[76],"queries":[78],"optimally":[79],"at":[80],"runtime.":[81],"Our":[82],"evaluation":[83],"using":[84],"industry-grade":[85],"machine":[86],"learning":[87],"(ML)":[88],"models":[89],"shows":[90],"yields":[93],"up":[94,109],"2x":[96],"an":[100],"optimal":[101],"homogeneous":[102],"solution,":[103],"outperforms":[105],"state-of-the-art":[106],"schemes":[107,118],"by":[108],"70%,":[111],"despite":[112],"advantageous":[113],"implementations":[114],"competing":[117],"ignore":[120],"overhead.":[123]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
