{"id":"https://openalex.org/W4409131852","doi":"https://doi.org/10.1109/hpec62836.2024.10938426","title":"LLM Inference Serving: Survey of Recent Advances and Opportunities","display_name":"LLM Inference Serving: Survey of Recent Advances and Opportunities","publication_year":2024,"publication_date":"2024-09-23","ids":{"openalex":"https://openalex.org/W4409131852","doi":"https://doi.org/10.1109/hpec62836.2024.10938426"},"language":"en","primary_location":{"id":"doi:10.1109/hpec62836.2024.10938426","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec62836.2024.10938426","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100733204","display_name":"Baolin Li","orcid":"https://orcid.org/0000-0001-9778-1023"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":true,"raw_author_name":"Baolin Li","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102690379","display_name":"Yankai Jiang","orcid":"https://orcid.org/0009-0006-9968-7560"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Yankai Jiang","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043450560","display_name":"Vijay Gadepally","orcid":"https://orcid.org/0000-0002-4598-2808"},"institutions":[{"id":"https://openalex.org/I4210122954","display_name":"MIT Lincoln Laboratory","ror":"https://ror.org/022z6jk58","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210122954","https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vijay Gadepally","raw_affiliation_strings":["MIT Lincoln Laboratory"],"affiliations":[{"raw_affiliation_string":"MIT Lincoln Laboratory","institution_ids":["https://openalex.org/I4210122954"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074406596","display_name":"Devesh Tiwari","orcid":null},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Devesh Tiwari","raw_affiliation_strings":["Northeastern University"],"affiliations":[{"raw_affiliation_string":"Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100733204"],"corresponding_institution_ids":["https://openalex.org/I87182695"],"apc_list":null,"apc_paid":null,"fwci":9.4712,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.98247658,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.6815000176429749,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.6815000176429749,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.603458821773529},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5967186689376831},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.43215325474739075},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.18159076571464539}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.603458821773529},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5967186689376831},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.43215325474739075},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.18159076571464539}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpec62836.2024.10938426","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpec62836.2024.10938426","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE High Performance Extreme Computing Conference (HPEC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W4385245566","https://openalex.org/W4387321091","https://openalex.org/W4395112660","https://openalex.org/W4400409890","https://openalex.org/W6777615688","https://openalex.org/W6794173217","https://openalex.org/W6846659131","https://openalex.org/W6850927664","https://openalex.org/W6850960556","https://openalex.org/W6852670370","https://openalex.org/W6853325866","https://openalex.org/W6853336479","https://openalex.org/W6857061474","https://openalex.org/W6858205958","https://openalex.org/W6858336856","https://openalex.org/W6858460081","https://openalex.org/W6859180420","https://openalex.org/W6860155063","https://openalex.org/W6860312956","https://openalex.org/W6860329836","https://openalex.org/W6860434696","https://openalex.org/W6861013483","https://openalex.org/W6861206314","https://openalex.org/W6861248927","https://openalex.org/W6861451979","https://openalex.org/W6861495178","https://openalex.org/W6861569072","https://openalex.org/W6861670789","https://openalex.org/W6861839547","https://openalex.org/W6862025885","https://openalex.org/W6862187050","https://openalex.org/W6862447213","https://openalex.org/W6862520094","https://openalex.org/W6862755109","https://openalex.org/W6864822335","https://openalex.org/W6864865257","https://openalex.org/W6864894424","https://openalex.org/W6865305742","https://openalex.org/W6865318415","https://openalex.org/W6865347877","https://openalex.org/W6865365722","https://openalex.org/W6866760227","https://openalex.org/W6868260766","https://openalex.org/W6868449971","https://openalex.org/W6869066467","https://openalex.org/W6869218818","https://openalex.org/W6869219368","https://openalex.org/W6869333693","https://openalex.org/W6869398331","https://openalex.org/W6869561046","https://openalex.org/W6869820326","https://openalex.org/W6869966162","https://openalex.org/W6870125966"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"This":[0,68],"survey":[1,69],"offers":[2],"a":[3,72],"comprehensive":[4],"overview":[5],"of":[6,82],"recent":[7],"advancements":[8],"in":[9,64,86],"Large":[10],"Language":[11],"Model":[12],"(LLM)":[13],"serving":[14],"systems,":[15],"focusing":[16],"on":[17],"research":[18],"since":[19],"the":[20,35,83],"year":[21],"2023.":[22],"We":[23],"specifically":[24],"examine":[25],"system-level":[26],"enhancements":[27],"that":[28],"improve":[29],"performance":[30],"and":[31,42,49,56,61],"efficiency":[32],"without":[33],"altering":[34],"core":[36],"LLM":[37,76],"decoding":[38],"mechanisms.":[39],"By":[40],"selecting":[41],"reviewing":[43],"high-quality":[44],"papers":[45],"from":[46],"prestigious":[47],"ML":[48],"system":[50],"venues,":[51],"we":[52],"highlight":[53],"key":[54],"innovations":[55],"practical":[57],"considerations":[58],"for":[59,75],"deploying":[60],"scaling":[62],"LLMs":[63],"real-world":[65],"production":[66],"environments.":[67],"serves":[70],"as":[71],"valuable":[73],"resource":[74],"practitioners":[77],"seeking":[78],"to":[79],"stay":[80],"abreast":[81],"latest":[84],"developments":[85],"this":[87],"rapidly":[88],"evolving":[89],"field.":[90]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":15}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
