{"id":"https://openalex.org/W4414736068","doi":"https://doi.org/10.1145/3731569.3764843","title":"KTransformers: Unleashing the Full Potential of CPU/GPU Hybrid Inference for MoE Models","display_name":"KTransformers: Unleashing the Full Potential of CPU/GPU Hybrid Inference for MoE Models","publication_year":2025,"publication_date":"2025-10-01","ids":{"openalex":"https://openalex.org/W4414736068","doi":"https://doi.org/10.1145/3731569.3764843"},"language":"en","primary_location":{"id":"doi:10.1145/3731569.3764843","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731569.3764843","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3731569.3764843","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109745376","display_name":"Hongtao Chen","orcid":"https://orcid.org/0000-0001-5110-0413"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongtao Chen","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101569590","display_name":"Weiyu Xie","orcid":"https://orcid.org/0000-0003-0173-1027"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiyu Xie","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074159707","display_name":"Boxin Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Boxin Zhang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101565422","display_name":"Jiren Tang","orcid":"https://orcid.org/0009-0006-3300-9425"},"institutions":[{"id":"https://openalex.org/I4210131127","display_name":"Sound Approach (United Kingdom)","ror":"https://ror.org/02k9x1e74","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210131127"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jingqi Tang","raw_affiliation_strings":["Approaching.AI, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Approaching.AI, Beijing, China","institution_ids":["https://openalex.org/I4210131127"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101789783","display_name":"Jiahao Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131127","display_name":"Sound Approach (United Kingdom)","ror":"https://ror.org/02k9x1e74","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210131127"]},{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Jiahao Wang","raw_affiliation_strings":["Approaching.Al, Beijing, China","Hangzhou Dianzi University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Approaching.Al, Beijing, China","institution_ids":["https://openalex.org/I4210131127"]},{"raw_affiliation_string":"Hangzhou Dianzi University, Hangzhou, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057202240","display_name":"Jianwei Dong","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianwei Dong","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102307049","display_name":"Shaoyuan Chen","orcid":"https://orcid.org/0000-0003-3526-3241"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaoyuan Chen","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102441532","display_name":"Ziwei Yuan","orcid":null},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]},{"id":"https://openalex.org/I4210131127","display_name":"Sound Approach (United Kingdom)","ror":"https://ror.org/02k9x1e74","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210131127"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Ziwei Yuan","raw_affiliation_strings":["Approaching.AI, Beijing, China","University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Approaching.AI, Beijing, China","institution_ids":["https://openalex.org/I4210131127"]},{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112884242","display_name":"Lin Chen","orcid":"https://orcid.org/0009-0003-6995-8137"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chen Lin","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068640694","display_name":"Qiu Chuang-yi","orcid":"https://orcid.org/0009-0003-4422-9368"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengyu Qiu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103487438","display_name":"Yongwei Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuening Zhu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119806982","display_name":"Qingliang Ou","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131127","display_name":"Sound Approach (United Kingdom)","ror":"https://ror.org/02k9x1e74","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210131127"]},{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Qingliang Ou","raw_affiliation_strings":["Approaching.AI, Beijing, China","Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Approaching.AI, Beijing, China","institution_ids":["https://openalex.org/I4210131127"]},{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112259445","display_name":"Jason A. Liao","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]},{"id":"https://openalex.org/I4210131127","display_name":"Sound Approach (United Kingdom)","ror":"https://ror.org/02k9x1e74","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210131127"]}],"countries":["CN","GB"],"is_corresponding":false,"raw_author_name":"Jiaqi Liao","raw_affiliation_strings":["Approaching.AI, Beijing, China","Beijing Institute of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Approaching.AI, Beijing, China","institution_ids":["https://openalex.org/I4210131127"]},{"raw_affiliation_string":"Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053064689","display_name":"X. T. Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131127","display_name":"Sound Approach (United Kingdom)","ror":"https://ror.org/02k9x1e74","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210131127"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Xianglin Chen","raw_affiliation_strings":["Approaching.AI, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Approaching.AI, Beijing, China","institution_ids":["https://openalex.org/I4210131127"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113120309","display_name":"Z.W. Ai","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131127","display_name":"Sound Approach (United Kingdom)","ror":"https://ror.org/02k9x1e74","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210131127"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zhiyuan Ai","raw_affiliation_strings":["Approaching.AI, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Approaching.AI, Beijing, China","institution_ids":["https://openalex.org/I4210131127"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115602897","display_name":"Yongwei Wu","orcid":"https://orcid.org/0000-0002-6651-7032"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongwei Wu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090106924","display_name":"Mingxing Zhang","orcid":"https://orcid.org/0000-0001-7518-0753"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingxing Zhang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":17,"corresponding_author_ids":["https://openalex.org/A5109745376"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":5.4906,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.95935697,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1014","last_page":"1029"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.5480999946594238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.5480999946594238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.52920001745224,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hybrid-system","display_name":"Hybrid system","score":0.5239999890327454},{"id":"https://openalex.org/keywords/synchronization","display_name":"Synchronization (alternating current)","score":0.4562999904155731},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4480000138282776},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4009000062942505},{"id":"https://openalex.org/keywords/hybrid-computer","display_name":"Hybrid computer","score":0.36410000920295715}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6353999972343445},{"id":"https://openalex.org/C50897621","wikidata":"https://www.wikidata.org/wiki/Q2665508","display_name":"Hybrid system","level":2,"score":0.5239999890327454},{"id":"https://openalex.org/C2778562939","wikidata":"https://www.wikidata.org/wiki/Q1298791","display_name":"Synchronization (alternating current)","level":3,"score":0.4562999904155731},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4480000138282776},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4009000062942505},{"id":"https://openalex.org/C18789546","wikidata":"https://www.wikidata.org/wiki/Q1341206","display_name":"Hybrid computer","level":2,"score":0.36410000920295715},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36000001430511475},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3580000102519989},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.33640000224113464},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3352000117301941},{"id":"https://openalex.org/C3018790387","wikidata":"https://www.wikidata.org/wiki/Q869010","display_name":"Hybrid learning","level":2,"score":0.26080000400543213}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3731569.3764843","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731569.3764843","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3731569.3764843","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731569.3764843","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2623399293","https://openalex.org/W2767510344","https://openalex.org/W4236965008","https://openalex.org/W4240517907","https://openalex.org/W4287391717","https://openalex.org/W4295312788","https://openalex.org/W4387302738","https://openalex.org/W4388093177","https://openalex.org/W4399424773","https://openalex.org/W4401211627","https://openalex.org/W4404400739","https://openalex.org/W4408017080","https://openalex.org/W4409282496"],"related_works":[],"abstract_inverted_index":{"Due":[0],"to":[1,57],"the":[2,26,34,66],"sparse":[3],"nature":[4],"of":[5,31,37],"Mixture-of-Experts":[6],"(MoE)":[7],"models,":[8,63],"they":[9],"are":[10],"particularly":[11],"suitable":[12],"for":[13],"hybrid":[14,22,41],"CPU/GPU":[15],"inference,":[16],"especially":[17],"in":[18],"low-concurrency":[19],"scenarios.":[20],"This":[21],"approach":[23],"leverages":[24],"both":[25],"large,":[27],"cost-effective":[28],"memory":[29],"capacity":[30],"CPU/DRAM":[32],"and":[33,49],"high":[35],"bandwidth":[36],"GPU/VRAM.":[38],"However,":[39],"existing":[40],"solutions":[42],"remain":[43],"bottlenecked":[44],"by":[45],"CPU":[46],"computation":[47],"limits":[48],"CPU-GPU":[50],"synchronization":[51],"overheads,":[52],"severely":[53],"restricting":[54],"their":[55],"ability":[56],"efficiently":[58],"run":[59],"state-of-the-art":[60],"large":[61],"MoE":[62],"such":[64],"as":[65],"671B":[67],"DeepSeek-V3/R1.":[68]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
