{"id":"https://openalex.org/W4406461405","doi":"https://doi.org/10.1109/bigdata62323.2024.10826121","title":"Performance Characterization of Expert Router for Scalable LLM Inference","display_name":"Performance Characterization of Expert Router for Scalable LLM Inference","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406461405","doi":"https://doi.org/10.1109/bigdata62323.2024.10826121"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10826121","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10826121","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086211130","display_name":"Josef Pichlmeier","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156768","display_name":"BMW Group (Germany)","ror":"https://ror.org/044kkbh92","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210156768"]},{"id":"https://openalex.org/I8204097","display_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/05591te55","country_code":"DE","type":"education","lineage":["https://openalex.org/I8204097"]},{"id":"https://openalex.org/I1283382300","display_name":"BMW (Germany)","ror":"https://ror.org/05vs9tj88","country_code":"DE","type":"company","lineage":["https://openalex.org/I1283382300","https://openalex.org/I4210156768"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Josef Pichlmeier","raw_affiliation_strings":["BMW Group Ludwig Maximilians Universit&#x00E4;t,Munich,Germany"],"affiliations":[{"raw_affiliation_string":"BMW Group Ludwig Maximilians Universit&#x00E4;t,Munich,Germany","institution_ids":["https://openalex.org/I4210156768","https://openalex.org/I8204097","https://openalex.org/I1283382300"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002603329","display_name":"Philipp Ross","orcid":"https://orcid.org/0000-0002-4720-9835"},"institutions":[{"id":"https://openalex.org/I4210156768","display_name":"BMW Group (Germany)","ror":"https://ror.org/044kkbh92","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210156768"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Philipp Ross","raw_affiliation_strings":["BMW Group,Munich,Germany"],"affiliations":[{"raw_affiliation_string":"BMW Group,Munich,Germany","institution_ids":["https://openalex.org/I4210156768"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077700715","display_name":"Andr\u00e9 Luckow","orcid":"https://orcid.org/0000-0002-1225-4062"},"institutions":[{"id":"https://openalex.org/I8204097","display_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/05591te55","country_code":"DE","type":"education","lineage":["https://openalex.org/I8204097"]},{"id":"https://openalex.org/I1283382300","display_name":"BMW (Germany)","ror":"https://ror.org/05vs9tj88","country_code":"DE","type":"company","lineage":["https://openalex.org/I1283382300","https://openalex.org/I4210156768"]},{"id":"https://openalex.org/I4210156768","display_name":"BMW Group (Germany)","ror":"https://ror.org/044kkbh92","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210156768"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Andre Luckow","raw_affiliation_strings":["BMW Group Ludwig Maximilians Universit&#x00E4;t,Munich,Germany"],"affiliations":[{"raw_affiliation_string":"BMW Group Ludwig Maximilians Universit&#x00E4;t,Munich,Germany","institution_ids":["https://openalex.org/I4210156768","https://openalex.org/I8204097","https://openalex.org/I1283382300"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5086211130"],"corresponding_institution_ids":["https://openalex.org/I1283382300","https://openalex.org/I4210156768","https://openalex.org/I8204097"],"apc_list":null,"apc_paid":null,"fwci":0.7274,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.78370649,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1686","last_page":"1693"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9545000195503235,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9545000195503235,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.95169997215271,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10927","display_name":"Access Control and Trust","score":0.9297000169754028,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7267366647720337},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.684041440486908},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6801205277442932},{"id":"https://openalex.org/keywords/router","display_name":"Router","score":0.5846412777900696},{"id":"https://openalex.org/keywords/characterization","display_name":"Characterization (materials science)","score":0.5153181552886963},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3251025676727295},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2724268436431885},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.203054279088974},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1492656171321869}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7267366647720337},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.684041440486908},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6801205277442932},{"id":"https://openalex.org/C2775896111","wikidata":"https://www.wikidata.org/wiki/Q642560","display_name":"Router","level":2,"score":0.5846412777900696},{"id":"https://openalex.org/C2780841128","wikidata":"https://www.wikidata.org/wiki/Q5073781","display_name":"Characterization (materials science)","level":2,"score":0.5153181552886963},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3251025676727295},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2724268436431885},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.203054279088974},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1492656171321869},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C171250308","wikidata":"https://www.wikidata.org/wiki/Q11468","display_name":"Nanotechnology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10826121","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10826121","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W2101234009","https://openalex.org/W2281036777","https://openalex.org/W2990138404","https://openalex.org/W3043571714","https://openalex.org/W3117513126","https://openalex.org/W3195577433","https://openalex.org/W4288089799","https://openalex.org/W4361021241","https://openalex.org/W4376122773","https://openalex.org/W4377130677","https://openalex.org/W4378189609","https://openalex.org/W4379260375","https://openalex.org/W4385245566","https://openalex.org/W4387163503","https://openalex.org/W4387427491","https://openalex.org/W4387500346","https://openalex.org/W4387839213","https://openalex.org/W4388874804","https://openalex.org/W4389524114","https://openalex.org/W4390306161","https://openalex.org/W4391045969","https://openalex.org/W4391799129","https://openalex.org/W4392489911","https://openalex.org/W4395065783","https://openalex.org/W4396822035","https://openalex.org/W4400141368","https://openalex.org/W4401042932","https://openalex.org/W4401043635","https://openalex.org/W4401211704","https://openalex.org/W4401307699","https://openalex.org/W4401395592","https://openalex.org/W4404782361","https://openalex.org/W6675354045","https://openalex.org/W6695237428","https://openalex.org/W6769627184","https://openalex.org/W6800751262","https://openalex.org/W6849590751","https://openalex.org/W6852201467","https://openalex.org/W6852670370","https://openalex.org/W6853048723","https://openalex.org/W6853325866","https://openalex.org/W6853465110","https://openalex.org/W6856459790","https://openalex.org/W6857420757","https://openalex.org/W6860155063","https://openalex.org/W6860329836","https://openalex.org/W6861287844","https://openalex.org/W6861374585","https://openalex.org/W6862520094","https://openalex.org/W6865347877","https://openalex.org/W6867104577","https://openalex.org/W6869333693","https://openalex.org/W6871042906"],"related_works":["https://openalex.org/W2122026593","https://openalex.org/W3107994849","https://openalex.org/W2582203024","https://openalex.org/W1588358165","https://openalex.org/W4237683758","https://openalex.org/W2370711413","https://openalex.org/W2052038519","https://openalex.org/W2375932043","https://openalex.org/W4247143848","https://openalex.org/W2009883749"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"experienced":[5],"widespread":[6],"adoption":[7],"across":[8,151],"scientific":[9],"and":[10,17,24,33,45,99,137,172],"industrial":[11],"domains":[12],"due":[13],"to":[14,61,82,104,159],"their":[15],"versatility":[16],"utility":[18],"for":[19,51,170],"diverse":[20],"tasks.":[21],"Nevertheless,":[22],"deploying":[23],"serving":[25],"these":[26,63],"models":[27,49,96,123,133,147],"at":[28],"scale":[29],"with":[30,97,118],"optimal":[31],"throughput":[32,136],"latency":[34,116,138],"remains":[35],"a":[36,58,66,75,125,152],"significant":[37],"challenge,":[38],"primarily":[39],"because":[40],"of":[41,121,155,167],"LLMs\u2019":[42],"high":[43],"computational":[44],"memory":[46],"demands.":[47],"Specialized":[48],"optimized":[50],"specific":[52],"tasks":[53],"can":[54],"be":[55],"combined":[56],"through":[57],"routing":[59,77],"mechanism":[60],"address":[62],"challenges,":[64],"creating":[65],"modular":[67],"inference":[68],"system.":[69],"This":[70,163],"paper":[71],"introduces":[72,114],"Expert":[73,89,112,168],"Router,":[74],"scalable":[76,173],"architecture":[78],"that":[79,111],"directs":[80],"prompts":[81],"specialized":[83],"expert":[84,122,132,146],"models.":[85,162],"We":[86],"characterize":[87],"multiple":[88],"Router":[90,113,169],"configurations,":[91],"including":[92],"different":[93],"LLama":[94],"3":[95],"quantized":[98],"non-quantized":[100],"weights":[101],"under":[102,139],"up":[103],"1,000":[105],"concurrent":[106,156],"users.":[107],"Our":[108],"findings":[109],"reveal":[110],"minimal":[115],"overhead,":[117],"the":[119,165],"configuration":[120],"being":[124],"dominating":[126],"factor":[127],"in":[128],"performance":[129,150],"outcomes.":[130],"High-parameter":[131],"deliver":[134],"stable":[135],"moderate":[140],"concurrency":[141],"levels.":[142],"In":[143],"contrast,":[144],"smaller":[145],"maintain":[148],"competitive":[149],"wider":[153],"range":[154],"users":[157],"compared":[158],"tensor-parallelized":[160],"baseline":[161],"highlights":[164],"potential":[166],"efficient":[171],"LLM":[174],"deployment.":[175]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
