{"id":"https://openalex.org/W7158152164","doi":"https://doi.org/10.1145/3805621.3807633","title":"Sampling Where It Matters: Predicting LLM Serving Performance","display_name":"Sampling Where It Matters: Predicting LLM Serving Performance","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7158152164","doi":"https://doi.org/10.1145/3805621.3807633"},"language":null,"primary_location":{"id":"doi:10.1145/3805621.3807633","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805621.3807633","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixth European Workshop on Machine Learning and Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805621.3807633","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134828114","display_name":"Emile Aydar","orcid":"https://orcid.org/0009-0008-9653-1848"},"institutions":[{"id":"https://openalex.org/I4210145784","display_name":"IBM Research - Ireland","ror":"https://ror.org/04jnxr720","country_code":"IE","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145784"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Emile Aydar","raw_affiliation_strings":["IBM Research Europe, Dublin, Ireland"],"raw_orcid":"https://orcid.org/0009-0008-9653-1848","affiliations":[{"raw_affiliation_string":"IBM Research Europe, Dublin, Ireland","institution_ids":["https://openalex.org/I4210145784"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031211716","display_name":"Christian Pinto","orcid":"https://orcid.org/0000-0001-7060-2742"},"institutions":[{"id":"https://openalex.org/I4210145784","display_name":"IBM Research - Ireland","ror":"https://ror.org/04jnxr720","country_code":"IE","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145784"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Christian Pinto","raw_affiliation_strings":["IBM Research Europe, Dublin, Ireland"],"raw_orcid":"https://orcid.org/0000-0001-7060-2742","affiliations":[{"raw_affiliation_string":"IBM Research Europe, Dublin, Ireland","institution_ids":["https://openalex.org/I4210145784"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036172590","display_name":"Srikumar Venugopal","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145784","display_name":"IBM Research - Ireland","ror":"https://ror.org/04jnxr720","country_code":"IE","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210145784"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Srikumar Venugopal","raw_affiliation_strings":["IBM Research Europe, Dublin, Ireland"],"raw_orcid":"https://orcid.org/0000-0001-8320-724X","affiliations":[{"raw_affiliation_string":"IBM Research Europe, Dublin, Ireland","institution_ids":["https://openalex.org/I4210145784"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101739299","display_name":"Dimitris Chatzopoulos","orcid":"https://orcid.org/0000-0002-4765-5085"},"institutions":[{"id":"https://openalex.org/I100930933","display_name":"University College Dublin","ror":"https://ror.org/05m7pjf47","country_code":"IE","type":"education","lineage":["https://openalex.org/I100930933"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Dimitris Chatzopoulos","raw_affiliation_strings":["University College Dublin, Dublin, Ireland"],"raw_orcid":"https://orcid.org/0000-0002-4765-5085","affiliations":[{"raw_affiliation_string":"University College Dublin, Dublin, Ireland","institution_ids":["https://openalex.org/I100930933"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5134828114"],"corresponding_institution_ids":["https://openalex.org/I4210145784"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93316129,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"13","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.2529999911785126,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.2529999911785126,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.08489999920129776,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.08049999922513962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.4837999939918518},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.4205999970436096},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.4058000147342682},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.37689998745918274},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.35510000586509705},{"id":"https://openalex.org/keywords/pareto-principle","display_name":"Pareto principle","score":0.3467999994754791}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6499999761581421},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.4837999939918518},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.4205999970436096},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.4058000147342682},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.37689998745918274},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.35510000586509705},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.3467999994754791},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3375999927520752},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32100000977516174},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3084000051021576},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2971000075340271},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.2897999882698059},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.28209999203681946},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.26510000228881836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805621.3807633","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805621.3807633","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixth European Workshop on Machine Learning and Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805621.3807633","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805621.3807633","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixth European Workshop on Machine Learning and Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6642209651","display_name":"6G-DALI: 6G DAta and ML operations automation via an end-to-end AI framework","funder_award_id":"101192750","funder_id":"https://openalex.org/F4320338444","funder_display_name":"HORIZON EUROPE Digital, Industry and Space"}],"funders":[{"id":"https://openalex.org/F4320338444","display_name":"HORIZON EUROPE Digital, Industry and Space","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1510052597","https://openalex.org/W1967692477","https://openalex.org/W1969545943","https://openalex.org/W1973333099","https://openalex.org/W2053934160","https://openalex.org/W2080021732","https://openalex.org/W2098720801","https://openalex.org/W2115305054","https://openalex.org/W2151025666","https://openalex.org/W2165698076","https://openalex.org/W2171993497","https://openalex.org/W2192203593","https://openalex.org/W2570764145","https://openalex.org/W2767224631","https://openalex.org/W2802235618","https://openalex.org/W2889745160","https://openalex.org/W2937450972","https://openalex.org/W2953843994","https://openalex.org/W2962755824","https://openalex.org/W3042713993","https://openalex.org/W3135013702","https://openalex.org/W3173017111","https://openalex.org/W3202428668","https://openalex.org/W3210776666","https://openalex.org/W3214421366","https://openalex.org/W4235606958","https://openalex.org/W4237151101","https://openalex.org/W4247680473","https://openalex.org/W4285043533","https://openalex.org/W4300580367","https://openalex.org/W4361982189","https://openalex.org/W4381327277","https://openalex.org/W4385877054","https://openalex.org/W4387321091","https://openalex.org/W4394998727","https://openalex.org/W4404199498","https://openalex.org/W4405755255","https://openalex.org/W4409248734","https://openalex.org/W4412095808","https://openalex.org/W4412915583","https://openalex.org/W4415763855","https://openalex.org/W4415957649"],"related_works":[],"abstract_inverted_index":{"Characterizing":[0],"Large":[1],"Language":[2],"Model":[3],"(LLM)":[4],"serving":[5,78],"performance":[6,40],"is":[7,32,84],"a":[8,12,58],"combinatorial":[9],"problem":[10],"where":[11],"suboptimal":[13],"choice":[14],"wastes":[15],"profiling":[16,129],"budget:":[17],"every":[18],"change":[19],"in":[20],"model,":[21],"hardware,":[22],"or":[23,48],"software":[24],"version":[25],"requires":[26],"fresh":[27],"profiling,":[28],"yet":[29],"exhaustive":[30],"benchmarking":[31],"infeasible.":[33],"Existing":[34],"approaches":[35],"-":[36,42],"simulators":[37],"and":[38,95,110],"static":[39],"estimators":[41],"lose":[43],"fidelity":[44],"on":[45,92],"novel":[46],"architectures":[47],"target":[49],"only":[50,86],"optima.":[51],"We":[52],"introduce":[53],"Predictive":[54],"Kernel":[55],"Herding":[56],"(PKH),":[57],"sampler":[59,87],"which":[60],"reformulates":[61],"Random":[62],"Forest":[63],"leaf":[64],"co-occurrence":[65],"as":[66],"linear-time":[67],"histogram":[68],"matching,":[69],"replacing":[70],"O(N2)":[71],"kernel":[72],"comparisons.":[73],"On":[74],"four":[75],"real-world":[76],"LLM":[77],"traces":[79],"spanning":[80],"3,000+":[81],"configurations,":[82],"PKH":[83,103],"the":[85,99,132],"that":[88],"delivers":[89],"top-ranked":[90],"accuracy":[91,123],"both":[93],"throughput":[94,106],"latency":[96],"predictions,":[97],"dominating":[98],"cost-accuracy":[100],"Pareto":[101],"frontier.":[102],"predicts":[104],"output":[105],"within":[107,117],"10%":[108],"MAPE":[109],"mean":[111],"Time":[112],"to":[113,126],"First":[114],"Token":[115],"(TTFT)":[116],"20%":[118],"MAPE,":[119],"reaching":[120],"practically":[121],"useful":[122],"with":[124],"up":[125],"1.6\u00d7":[127],"lower":[128],"time":[130],"than":[131],"next-best":[133],"method":[134],"at":[135],"equivalent":[136],"error.":[137]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-30T00:00:00"}
