{"id":"https://openalex.org/W7134937227","doi":"https://doi.org/10.1109/asp-dac66049.2026.11420504","title":"DeepPiC: xPU-PIM Cluster Architecture with Adaptive Resource-Aware Task Orchestration for DeepSeek-Style MoE Inference","display_name":"DeepPiC: xPU-PIM Cluster Architecture with Adaptive Resource-Aware Task Orchestration for DeepSeek-Style MoE Inference","publication_year":2026,"publication_date":"2026-01-19","ids":{"openalex":"https://openalex.org/W7134937227","doi":"https://doi.org/10.1109/asp-dac66049.2026.11420504"},"language":null,"primary_location":{"id":"doi:10.1109/asp-dac66049.2026.11420504","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asp-dac66049.2026.11420504","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 31st Asia and South Pacific Design Automation Conference (ASP-DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040460362","display_name":"Zixu Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zixu Li","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China","institution_ids":["https://openalex.org/I4210132426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128692088","display_name":"Manni Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Manni Li","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China","institution_ids":["https://openalex.org/I4210132426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042158652","display_name":"Zijian Huang","orcid":"https://orcid.org/0000-0003-3344-4962"},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zijian Huang","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China","institution_ids":["https://openalex.org/I4210132426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100759140","display_name":"Jiayu Yang","orcid":"https://orcid.org/0000-0002-9718-0524"},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiayu Yang","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China","institution_ids":["https://openalex.org/I4210132426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128703087","display_name":"Wending Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wending Zhao","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China","institution_ids":["https://openalex.org/I4210132426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088665704","display_name":"Yinyin Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinyin Lin","raw_affiliation_strings":["Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China","institution_ids":["https://openalex.org/I4210132426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128705908","display_name":"Chengchen Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210098582","display_name":"ZTE (China)","ror":"https://ror.org/00rjhhq63","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210098582"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengchen Wang","raw_affiliation_strings":["State Key Laboratory of Mobile Multimedia Technology ZTE Corporation,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Mobile Multimedia Technology ZTE Corporation,Shenzhen,China","institution_ids":["https://openalex.org/I4210098582"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061918116","display_name":"Haidong Tian","orcid":null},"institutions":[{"id":"https://openalex.org/I4210098582","display_name":"ZTE (China)","ror":"https://ror.org/00rjhhq63","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210098582"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haidong Tian","raw_affiliation_strings":["State Key Laboratory of Mobile Multimedia Technology ZTE Corporation,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Mobile Multimedia Technology ZTE Corporation,Shenzhen,China","institution_ids":["https://openalex.org/I4210098582"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5128785278","display_name":"Xiankui Xiong","orcid":null},"institutions":[{"id":"https://openalex.org/I4210098582","display_name":"ZTE (China)","ror":"https://ror.org/00rjhhq63","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210098582"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiankui Xiong","raw_affiliation_strings":["State Key Laboratory of Mobile Multimedia Technology ZTE Corporation,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Mobile Multimedia Technology ZTE Corporation,Shenzhen,China","institution_ids":["https://openalex.org/I4210098582"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5040460362"],"corresponding_institution_ids":["https://openalex.org/I4210132426"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.86467545,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"311","last_page":"317"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.16840000450611115,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.16840000450611115,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11450","display_name":"Model-Driven Software Engineering Techniques","score":0.10140000283718109,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10703","display_name":"Business Process Modeling and Analysis","score":0.07090000063180923,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6202999949455261},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.46160000562667847},{"id":"https://openalex.org/keywords/orchestration","display_name":"Orchestration","score":0.44690001010894775},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.43799999356269836},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.3569999933242798},{"id":"https://openalex.org/keywords/systems-architecture","display_name":"Systems architecture","score":0.27880001068115234}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7063999772071838},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6202999949455261},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.46160000562667847},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44999998807907104},{"id":"https://openalex.org/C199168358","wikidata":"https://www.wikidata.org/wiki/Q3367000","display_name":"Orchestration","level":3,"score":0.44690001010894775},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.43799999356269836},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3569999933242798},{"id":"https://openalex.org/C98025372","wikidata":"https://www.wikidata.org/wiki/Q477538","display_name":"Systems architecture","level":3,"score":0.27880001068115234},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26100000739097595},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.25870001316070557},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2558000087738037},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asp-dac66049.2026.11420504","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asp-dac66049.2026.11420504","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 31st Asia and South Pacific Design Automation Conference (ASP-DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W3100710793","https://openalex.org/W3134274954","https://openalex.org/W3189166979","https://openalex.org/W4295036115","https://openalex.org/W4297097348","https://openalex.org/W4387064057","https://openalex.org/W4388031315","https://openalex.org/W4392427708","https://openalex.org/W4394998968","https://openalex.org/W4395073431","https://openalex.org/W4408861504","https://openalex.org/W4409248734","https://openalex.org/W4411486231"],"related_works":[],"abstract_inverted_index":{"The":[0],"success":[1],"of":[2,26,75,261],"DeepSeek":[3,19,49,202],"has":[4],"driven":[5,150],"demand":[6],"for":[7,48,64,93],"deploying":[8],"high-performance":[9],"inference":[10,46],"clusters.":[11],"However,":[12,129],"due":[13,52],"to":[14,39,53,79,109,125,142,144,220,227,280],"its":[15,43,134],"Transformer-based":[16],"autoregressive":[17],"structure,":[18],"remains":[20,50],"severely":[21],"bandwidth-bound,":[22],"limiting":[23],"the":[24,62,68],"scalability":[25],"traditional":[27],"xPU":[28,120,279,299],"(e.g.,":[29],"GPU/TPU).":[30],"While":[31],"DRAM-based":[32],"processing-inmemory":[33],"(PIM)":[34],"offers":[35],"a":[36,87,105,176,217],"promising":[37],"solution":[38],"overcome":[40],"memory":[41,148,290],"bottlenecks,":[42],"use":[44],"in":[45,67],"clusters":[47],"underexplored":[51],"three":[54],"challenges:":[55],"(1)":[56,191],"non-trivial":[57],"inter-device":[58,162],"communication":[59,163],"overhead;":[60],"(2)":[61,196],"need":[63],"expert":[65],"parallelism":[66,193],"mixture-of-experts":[69],"(MoE)":[70],"module;":[71],"and":[72,100,147,165,195,207,233,243,257,298],"(3)":[73],"lack":[74],"efficient":[76],"task":[77,186],"offloading":[78],"PIM.":[80],"To":[81],"this":[82],"end,":[83],"we":[84,169],"propose":[85,170],"DeepPiC,":[86],"novel":[88],"xPU-PIM":[89],"cluster":[90],"architecture":[91],"designed":[92],"DeepSeek-style":[94],"models":[95],"with":[96],"multi-latent":[97],"attention":[98],"(MLA)":[99],"MoE":[101],"modules.":[102],"DeepPiC":[103,130,223,276],"introduces":[104],"heterogeneous":[106,300],"xPU+HBM-PIM":[107],"device":[108],"accelerate":[110],"low":[111],"arithmetic":[112],"intensity":[113],"operations.":[114],"It":[115],"can":[116],"seamlessly":[117],"replace":[118],"conventional":[119],"devices":[121],"without":[122],"any":[123],"modification":[124],"clusterlevel":[126],"interconnect":[127],"topology.":[128],"cannot":[131],"fully":[132],"realize":[133],"performance":[135,267],"potential":[136],"under":[137],"static":[138],"scheduling,":[139],"which":[140],"fails":[141],"adapt":[143],"shifting":[145],"compute":[146],"demands":[149],"by":[151,188,287],"multidimensional":[152],"variability":[153],"(model":[154],"heterogeneity,":[155],"cluster-scale":[156],"volatility,":[157],"runtime":[158],"dynamics).":[159],"This":[160],"induces":[161],"overhead":[164],"intra-device":[166,197],"underutilization.":[167],"Thus,":[168],"Adaptive":[171],"Resource-Aware":[172],"Task":[173],"Orchestration":[174],"(ARTO),":[175],"two-phase":[177],"strategy":[178],"that":[179,275,295],"decouples":[180],"global":[181],"model":[182],"partitioning":[183],"from":[184],"local":[185],"assignment":[187],"dynamically":[189],"coordinating":[190],"crossdevice":[192],"optimization":[194],"xPU/PIM":[198],"mapping.":[199],"Evaluated":[200],"on":[201],"V3-671B":[203],"using":[204],"H20-,":[205],"A100-,":[206,242],"$\\mathbf{H":[208,239,244,263],"2":[209,213,240,245,264],"0":[210,246,265],"0}$-Cluster":[211,247,266],"($\\mathbf{H":[212],"0}$":[214],"serves":[215],"as":[216],"compute-limited":[218],"alternative":[219],"high-end":[221],"GPUs),":[222],"(H20+HBM-PIM)":[224],"achieves":[225],"up":[226],"$\\mathbf{3}":[228],"\\times":[229],"\\mathbf{,":[230],"2}":[231],"\\times$":[232,236],"$\\mathbf{1.":[234],"3}":[235],"speedup":[237],"over":[238],"0}$-,":[241],"at":[248,268],"small":[249],"batch":[250,270],"sizes,":[251],"while":[252],"maintaining":[253],"$\\mathbf{7":[254],"4":[255,259],"\\%}$":[256,260],"$\\mathbf{5":[258],"A100and":[262],"large":[269],"sizes.":[271],"These":[272],"results":[273],"demonstrate":[274],"enables":[277],"low-end":[278],"approach":[281],"or":[282],"even":[283],"exceed":[284],"premium":[285],"ones":[286],"fundamentally":[288],"overcoming":[289],"bottlenecks":[291],"via":[292],"adaptive":[293],"scheduling":[294],"orchestrates":[296],"PIM":[297],"resources.":[301]},"counts_by_year":[],"updated_date":"2026-03-13T14:20:09.374765","created_date":"2026-03-12T00:00:00"}
