{"id":"https://openalex.org/W4414688324","doi":"https://doi.org/10.1145/3766882.3767173","title":"Frontier: Simulating the Next Generation of LLM Inference Systems","display_name":"Frontier: Simulating the Next Generation of LLM Inference Systems","publication_year":2025,"publication_date":"2025-10-01","ids":{"openalex":"https://openalex.org/W4414688324","doi":"https://doi.org/10.1145/3766882.3767173"},"language":"en","primary_location":{"id":"doi:10.1145/3766882.3767173","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3766882.3767173","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th Workshop on Practical Adoption Challenges of ML for Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3766882.3767173","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100609931","display_name":"Yicheng Feng","orcid":"https://orcid.org/0000-0002-4652-7794"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Yicheng Feng","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-4652-7794","affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008826172","display_name":"Xin Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xin Tan","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":"https://orcid.org/0000-0003-3785-9700","affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119790805","display_name":"Kin Hang Sew","orcid":null},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Kin Hang Sew","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":"https://orcid.org/0009-0007-6235-1577","affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079353641","display_name":"Yimin Jiang","orcid":"https://orcid.org/0009-0001-0049-873X"},"institutions":[{"id":"https://openalex.org/I4210140476","display_name":"Shanghai Zhaozhan Metal Materials","ror":"https://ror.org/03wtw1749","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210140476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yimin Jiang","raw_affiliation_strings":["StepFun, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0001-0049-873X","affiliations":[{"raw_affiliation_string":"StepFun, Shanghai, China","institution_ids":["https://openalex.org/I4210140476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028120333","display_name":"Yibo Zhu","orcid":"https://orcid.org/0000-0002-9113-2660"},"institutions":[{"id":"https://openalex.org/I4210140476","display_name":"Shanghai Zhaozhan Metal Materials","ror":"https://ror.org/03wtw1749","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210140476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yibo Zhu","raw_affiliation_strings":["StepFun, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-9113-2660","affiliations":[{"raw_affiliation_string":"StepFun, Shanghai, China","institution_ids":["https://openalex.org/I4210140476"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022420830","display_name":"Hong Xu","orcid":"https://orcid.org/0000-0002-9359-9571"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Hong Xu","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-9359-9571","affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100609931"],"corresponding_institution_ids":["https://openalex.org/I177725633"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30569374,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"25","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.9728000164031982,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.9728000164031982,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9035000205039978,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7515000104904175},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6187999844551086},{"id":"https://openalex.org/keywords/frontier","display_name":"Frontier","score":0.4088999927043915},{"id":"https://openalex.org/keywords/backward-chaining","display_name":"Backward chaining","score":0.37689998745918274},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.3529999852180481},{"id":"https://openalex.org/keywords/expert-system","display_name":"Expert system","score":0.352400004863739},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.3425999879837036}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8062999844551086},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7515000104904175},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6187999844551086},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.4088999927043915},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3797000050544739},{"id":"https://openalex.org/C129916263","wikidata":"https://www.wikidata.org/wiki/Q1141183","display_name":"Backward chaining","level":4,"score":0.37689998745918274},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.3529999852180481},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.352400004863739},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.3425999879837036},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33469998836517334},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3346000015735626},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.31529998779296875},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.30979999899864197},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.30329999327659607},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2720000147819519},{"id":"https://openalex.org/C33724603","wikidata":"https://www.wikidata.org/wiki/Q812540","display_name":"Bayesian network","level":2,"score":0.26589998602867126},{"id":"https://openalex.org/C95167961","wikidata":"https://www.wikidata.org/wiki/Q4483495","display_name":"Fiducial inference","level":5,"score":0.26260000467300415},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2614000141620636},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26109999418258667},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.260699987411499},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.2558000087738037},{"id":"https://openalex.org/C31352089","wikidata":"https://www.wikidata.org/wiki/Q3750474","display_name":"Systems design","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3766882.3767173","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3766882.3767173","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th Workshop on Practical Adoption Challenges of ML for Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3766882.3767173","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3766882.3767173","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th Workshop on Practical Adoption Challenges of ML for Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2734941459","https://openalex.org/W4376652719","https://openalex.org/W4387321091","https://openalex.org/W4396822035","https://openalex.org/W4404848672"],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Model":[2],"(LLM)":[3],"inference":[4,90,138],"is":[5],"growing":[6],"increasingly":[7],"complex":[8,100],"with":[9,91],"the":[10,38,50,66,97,128,134],"rise":[11],"of":[12,41,54,99,136],"Mixture-of-Experts":[13],"(MoE)":[14],"models":[15,122],"and":[16,44,82,106,116,132],"disaggregated":[17,83],"architectures":[18],"that":[19],"decouple":[20],"components":[21],"like":[22,102],"prefill/decode":[23],"(PD)":[24],"or":[25],"attention/FFN":[26],"(AF)":[27],"for":[28,69,88,110,123],"heterogeneous":[29],"scaling.":[30],"Existing":[31],"simulators,":[32],"however,":[33],"fall":[34],"short":[35],"in":[36],"modeling":[37],"system-level":[39],"complexities":[40],"distributed":[42],"serving,":[43],"thus":[45],"are":[46],"unable":[47],"to":[48,78,130],"capture":[49],"intricate":[51],"system":[52],"dynamics":[53],"these":[55],"emerging":[56],"paradigms.":[57],"We":[58],"present":[59],"Frontier,":[60],"a":[61,75],"high-fidelity":[62],"simulator":[63],"designed":[64],"from":[65],"ground":[67],"up":[68],"this":[70],"new":[71],"landscape.":[72],"Frontier":[73,118,126],"introduces":[74],"unified":[76],"framework":[77],"model":[79],"both":[80],"co-located":[81],"systems,":[84],"providing":[85],"native":[86],"support":[87],"MoE":[89],"expert":[92,104],"parallelism":[93],"(EP).":[94],"It":[95],"enables":[96],"simulation":[98],"workflows":[101],"cross-cluster":[103],"routing":[105],"advanced":[107],"pipelining":[108],"strategies":[109],"latency":[111],"hiding.":[112],"To":[113],"ensure":[114],"fidelity":[115],"usability,":[117],"incorporates":[119],"refined":[120],"operator":[121],"improved":[124],"accuracy.":[125],"empowers":[127],"community":[129],"design":[131],"optimize":[133],"future":[135],"LLM":[137],"at":[139],"scale.":[140]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
