{"id":"https://openalex.org/W4412915583","doi":"https://doi.org/10.1145/3754448","title":"Towards Efficient Generative Large Language Model Serving: A Survey from Algorithms to Systems","display_name":"Towards Efficient Generative Large Language Model Serving: A Survey from Algorithms to Systems","publication_year":2025,"publication_date":"2025-07-25","ids":{"openalex":"https://openalex.org/W4412915583","doi":"https://doi.org/10.1145/3754448"},"language":"en","primary_location":{"id":"doi:10.1145/3754448","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3754448","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},"type":"review","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3754448","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015552951","display_name":"Xupeng Miao","orcid":"https://orcid.org/0000-0002-9371-8358"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xupeng Miao","raw_affiliation_strings":["Purdue University","Purdue University, West Lafayette, United States"],"affiliations":[{"raw_affiliation_string":"Purdue University","institution_ids":["https://openalex.org/I219193219"]},{"raw_affiliation_string":"Purdue University, West Lafayette, United States","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073183750","display_name":"Gabriele Oliaro","orcid":"https://orcid.org/0000-0001-5406-0736"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gabriele Oliaro","raw_affiliation_strings":["Carnegie Mellon University","Carnegie Mellon University, Pittsburgh, United States"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072348433","display_name":"Zhihao Zhang","orcid":"https://orcid.org/0009-0002-8409-2717"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhihao Zhang","raw_affiliation_strings":["Carnegie Mellon University","Carnegie Mellon University, Pittsburgh, United States"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040267796","display_name":"Xinhao Cheng","orcid":"https://orcid.org/0009-0009-3375-497X"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xinhao Cheng","raw_affiliation_strings":["Carnegie Mellon University","Carnegie Mellon University, Pittsburgh, United States"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101264780","display_name":"Hongyi Jin","orcid":"https://orcid.org/0000-0001-6894-6554"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hongyi Jin","raw_affiliation_strings":["Carnegie Mellon University","Carnegie Mellon University, Pittsburgh, United States"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023529355","display_name":"Tianqi Chen","orcid":"https://orcid.org/0000-0002-5744-3940"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianqi Chen","raw_affiliation_strings":["Carnegie Mellon University","Carnegie Mellon University, Pittsburgh, United States"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, United States","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062437461","display_name":"Zhihao Jia","orcid":"https://orcid.org/0000-0002-1270-5185"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhihao Jia","raw_affiliation_strings":["Carnegie Mellon University","Carnegie Mellon University, Pittsburgh, United States"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, United States","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5015552951"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":31.0051,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.99580527,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"58","issue":"1","first_page":"1","last_page":"37"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8885490894317627},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6322100758552551},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4473499655723572},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.3513999581336975},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31740105152130127}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8885490894317627},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6322100758552551},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4473499655723572},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3513999581336975},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31740105152130127}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3754448","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3754448","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3754448","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3754448","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":120,"referenced_works":["https://openalex.org/W2056999868","https://openalex.org/W2158992088","https://openalex.org/W2293634267","https://openalex.org/W2739738036","https://openalex.org/W2788193959","https://openalex.org/W2899971035","https://openalex.org/W2915716523","https://openalex.org/W2924425593","https://openalex.org/W2954698171","https://openalex.org/W2962677625","https://openalex.org/W2962969034","https://openalex.org/W2963434219","https://openalex.org/W2963951265","https://openalex.org/W2965046076","https://openalex.org/W2969515962","https://openalex.org/W2970454332","https://openalex.org/W2970777192","https://openalex.org/W2972087877","https://openalex.org/W2981758446","https://openalex.org/W2988975212","https://openalex.org/W3004495293","https://openalex.org/W3008374555","https://openalex.org/W3035030897","https://openalex.org/W3035038672","https://openalex.org/W3037847693","https://openalex.org/W3043571714","https://openalex.org/W3085139254","https://openalex.org/W3101163004","https://openalex.org/W3105966348","https://openalex.org/W3130689885","https://openalex.org/W3130716829","https://openalex.org/W3131922516","https://openalex.org/W3137147200","https://openalex.org/W3154922002","https://openalex.org/W3157506437","https://openalex.org/W3170113752","https://openalex.org/W3170796112","https://openalex.org/W3171750540","https://openalex.org/W3174339925","https://openalex.org/W3177172118","https://openalex.org/W3177318507","https://openalex.org/W3196295870","https://openalex.org/W3207645655","https://openalex.org/W4205952419","https://openalex.org/W4206289265","https://openalex.org/W4220850685","https://openalex.org/W4220967350","https://openalex.org/W4221143046","https://openalex.org/W4226079124","https://openalex.org/W4280496502","https://openalex.org/W4280611847","https://openalex.org/W4281651027","https://openalex.org/W4281922990","https://openalex.org/W4285212262","https://openalex.org/W4286900001","https://openalex.org/W4287391717","https://openalex.org/W4287704453","https://openalex.org/W4287777801","https://openalex.org/W4288347855","https://openalex.org/W4289302788","https://openalex.org/W4308760184","https://openalex.org/W4310510250","https://openalex.org/W4311327542","https://openalex.org/W4312820606","https://openalex.org/W4318541538","https://openalex.org/W4318541554","https://openalex.org/W4321500171","https://openalex.org/W4321636575","https://openalex.org/W4327911434","https://openalex.org/W4361000555","https://openalex.org/W4362559429","https://openalex.org/W4364382874","https://openalex.org/W4377079846","https://openalex.org/W4380874786","https://openalex.org/W4384211302","https://openalex.org/W4384705353","https://openalex.org/W4385488663","https://openalex.org/W4385562710","https://openalex.org/W4385570483","https://openalex.org/W4385571187","https://openalex.org/W4385571586","https://openalex.org/W4386191499","https://openalex.org/W4387302777","https://openalex.org/W4387321091","https://openalex.org/W4387321503","https://openalex.org/W4388031315","https://openalex.org/W4388093177","https://openalex.org/W4388874804","https://openalex.org/W4389518760","https://openalex.org/W4389519226","https://openalex.org/W4389520033","https://openalex.org/W4389523718","https://openalex.org/W4389524473","https://openalex.org/W4389524555","https://openalex.org/W4389576338","https://openalex.org/W4394998727","https://openalex.org/W4395020691","https://openalex.org/W4395112660","https://openalex.org/W4398233813","https://openalex.org/W4401018280","https://openalex.org/W4401211704","https://openalex.org/W4402671659","https://openalex.org/W4402671766","https://openalex.org/W4402671835","https://openalex.org/W4402672007","https://openalex.org/W4404401017","https://openalex.org/W4404401018","https://openalex.org/W4404782823","https://openalex.org/W4406164090","https://openalex.org/W4406650295","https://openalex.org/W4407197044","https://openalex.org/W4407217670","https://openalex.org/W4407218342","https://openalex.org/W4408105642","https://openalex.org/W4408750063","https://openalex.org/W4408844835","https://openalex.org/W4408903581","https://openalex.org/W4411120381","https://openalex.org/W4411630296","https://openalex.org/W4411638748"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2380075625","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345"],"abstract_inverted_index":{"In":[0],"the":[1,16,26,56,74,113,132,140],"rapidly":[2],"evolving":[3],"landscape":[4],"of":[5,32,41,76,91,112,134,142],"artificial":[6],"intelligence":[7],"(AI),":[8],"generative":[9],"large":[10],"language":[11],"models":[12,35],"(LLMs)":[13],"stand":[14],"at":[15,73],"forefront,":[17],"revolutionizing":[18],"how":[19],"we":[20],"interact":[21],"with":[22],"our":[23],"data.":[24],"However,":[25],"computational":[27],"intensity":[28],"and":[29,50,80,116,128],"memory":[30],"consumption":[31],"deploying":[33],"these":[34],"present":[36],"substantial":[37],"challenges":[38],"in":[39,45,101,119,130],"terms":[40],"serving":[42,62],"efficiency,":[43],"particularly":[44],"scenarios":[46],"demanding":[47],"low":[48],"latency":[49],"high":[51],"throughput.":[52],"This":[53],"survey":[54,105],"addresses":[55],"imperative":[57],"need":[58],"for":[59,126],"efficient":[60,120],"LLM":[61,121,136],"methodologies":[63],"from":[64,94],"a":[65,89,109],"machine":[66],"learning":[67],"system":[68,82,102],"(MLSys)":[69],"research":[70],"perspective,":[71],"standing":[72],"crux":[75],"advanced":[77],"AI":[78],"innovations":[79],"practical":[81],"optimizations.":[83],"We":[84],"provide":[85,108],"in-depth":[86],"analysis,":[87],"covering":[88],"spectrum":[90],"solutions,":[92],"ranging":[93],"cutting-edge":[95],"algorithmic":[96],"modifications":[97],"to":[98,107],"groundbreaking":[99],"changes":[100],"designs.":[103],"The":[104],"aims":[106],"comprehensive":[110],"understanding":[111],"current":[114],"state":[115],"future":[117,141],"directions":[118],"serving,":[122],"offering":[123],"valuable":[124],"insights":[125],"researchers":[127],"practitioners":[129],"overcoming":[131],"barriers":[133],"effective":[135],"deployment,":[137],"thereby":[138],"reshaping":[139],"AI.":[143]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-07T14:57:38.498316","created_date":"2025-10-10T00:00:00"}
