{"id":"https://openalex.org/W7133528682","doi":"https://doi.org/10.1109/hpca68181.2026.11408536","title":"COMET: Communication and Memory Co-Design for Fine-Grained AI Inference in MCM Accelerators","display_name":"COMET: Communication and Memory Co-Design for Fine-Grained AI Inference in MCM Accelerators","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7133528682","doi":"https://doi.org/10.1109/hpca68181.2026.11408536"},"language":null,"primary_location":{"id":"doi:10.1109/hpca68181.2026.11408536","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408536","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048181464","display_name":"Taishu Sheng","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Taishu Sheng","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology,HiNA Lab"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology,HiNA Lab","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089719475","display_name":"Guangyu Sun","orcid":"https://orcid.org/0000-0002-7315-6589"},"institutions":[{"id":"https://openalex.org/I4210124274","display_name":"Fraunhofer Institute for Integrated Circuits","ror":"https://ror.org/024ape423","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210124274","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Guangyu Sun","raw_affiliation_strings":["School of Integrated Circuits, Peking University, and Beijing Advanced Innovation Center for Integrated Circuits"],"affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, Peking University, and Beijing Advanced Innovation Center for Integrated Circuits","institution_ids":["https://openalex.org/I4210124274"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5128045202","display_name":"Dezun Dong","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dezun Dong","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology,HiNA Lab"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology,HiNA Lab","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5048181464"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.40623197,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.23499999940395355,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.23499999940395355,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.08079999685287476,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.059700001031160355,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.44339999556541443},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.28459998965263367},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.2612999975681305},{"id":"https://openalex.org/keywords/applications-of-artificial-intelligence","display_name":"Applications of artificial intelligence","score":0.21050000190734863}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7001000046730042},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4844000041484833},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.44339999556541443},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.35679998993873596},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2736999988555908},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C157170001","wikidata":"https://www.wikidata.org/wiki/Q4781507","display_name":"Applications of artificial intelligence","level":2,"score":0.21050000190734863},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.20759999752044678},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.20479999482631683}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca68181.2026.11408536","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408536","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1422208591","display_name":null,"funder_award_id":"L243001","funder_id":"https://openalex.org/F4320322919","funder_display_name":"Natural Science Foundation of Beijing Municipality"},{"id":"https://openalex.org/G3536056053","display_name":null,"funder_award_id":"2022YFB4501702","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G6847781164","display_name":null,"funder_award_id":"B18001","funder_id":"https://openalex.org/F4320327912","funder_display_name":"Higher Education Discipline Innovation Project"},{"id":"https://openalex.org/G8067929453","display_name":null,"funder_award_id":"62421002,U24B20151","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322919","display_name":"Natural Science Foundation of Beijing Municipality","ror":null},{"id":"https://openalex.org/F4320327912","display_name":"Higher Education Discipline Innovation Project","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W1536680647","https://openalex.org/W1978111236","https://openalex.org/W2025516544","https://openalex.org/W2118231264","https://openalex.org/W2147657366","https://openalex.org/W2157225945","https://openalex.org/W2168809519","https://openalex.org/W2194775991","https://openalex.org/W2549139847","https://openalex.org/W2605350416","https://openalex.org/W2618530766","https://openalex.org/W2766447205","https://openalex.org/W2884367402","https://openalex.org/W2935331687","https://openalex.org/W2945146780","https://openalex.org/W2973166032","https://openalex.org/W2980104813","https://openalex.org/W2981474208","https://openalex.org/W3006507836","https://openalex.org/W3012493694","https://openalex.org/W3016082253","https://openalex.org/W3028999579","https://openalex.org/W3047390932","https://openalex.org/W3092226368","https://openalex.org/W3135242540","https://openalex.org/W3188178661","https://openalex.org/W3190062760","https://openalex.org/W3192636176","https://openalex.org/W4246193833","https://openalex.org/W4296209134","https://openalex.org/W4320002812","https://openalex.org/W4352977393","https://openalex.org/W4376130831","https://openalex.org/W4385730099","https://openalex.org/W4391932394","https://openalex.org/W4393406935","https://openalex.org/W4393592481","https://openalex.org/W4396782780","https://openalex.org/W4401212084","https://openalex.org/W4404955380","https://openalex.org/W4409248543","https://openalex.org/W4409282593","https://openalex.org/W4411487111","https://openalex.org/W4413755464"],"related_works":[],"abstract_inverted_index":{"Chiplet-based":[0],"architectures":[1],"emerge":[2],"as":[3],"a":[4,130],"promising":[5],"approach":[6],"to":[7,25,67,147,156,170],"overcoming":[8],"the":[9,19,27,43,73,79,106],"physical":[10],"and":[11,45,64,78,93,141,160,199,211],"manufacturing":[12],"constraints":[13],"faced":[14],"by":[15,54],"monolithic":[16],"chips,":[17],"enabling":[18],"scalable":[20],"integration":[21],"of":[22,30,75,81],"computing":[23],"resources":[24],"meet":[26],"growing":[28],"demands":[29],"AI":[31,59,91,121,181],"workloads.":[32,122,213],"However,":[33],"efficient":[34],"inter-chiplet":[35],"communication":[36,62,110,162,189],"still":[37],"faces":[38],"significant":[39],"bottlenecks,":[40],"especially":[41],"under":[42],"fine-grained":[44],"bursty":[46],"Direct":[47],"Memory":[48],"Access":[49],"(DMA)":[50],"request":[51,139],"patterns":[52],"generated":[53],"processing":[55],"elements":[56],"in":[57,89,119],"modern":[58],"tasks.":[60,182],"Existing":[61],"models":[63],"simulators":[65],"fail":[66],"capture":[68],"these":[69,102],"characteristics,":[70],"which":[71],"limits":[72],"accuracy":[74],"performance":[76],"analysis":[77],"effectiveness":[80],"optimization":[82],"strategies.":[83],"These":[84],"limitations":[85],"hinder":[86],"DMA-communication":[87],"inefficiencies":[88],"chiplet-based":[90],"systems":[92],"pose":[94],"challenges":[95],"for":[96,136],"designing":[97],"HPC":[98],"architectures.":[99],"To":[100],"address":[101,143],"challenges,":[103],"we":[104,127],"present":[105],"first":[107],"comprehensive":[108],"chiplet":[109,148,188],"model":[111],"that":[112,133],"explicitly":[113],"incorporates":[114],"finegrained":[115],"DMA":[116,138,154],"traffic":[117],"observed":[118],"realistic":[120],"Building":[123],"on":[124,186],"this":[125],"model,":[126],"propose":[128],"COMET,":[129],"novel":[131],"framework":[132],"intelligently":[134],"searches":[135],"optimal":[137],"aggregation":[140],"memory":[142,168],"mapping":[144,169],"strategies":[145],"tailored":[146],"environments.":[149],"COMET":[150,191],"dynamically":[151],"consolidates":[152],"small":[153],"transfers":[155],"improve":[157],"bandwidth":[158,206],"utilization":[159,207],"reduce":[161],"latency,":[163],"while":[164],"also":[165],"adapting":[166],"on-chip":[167],"align":[171],"with":[172,184],"workload-specific":[173],"dataflows.":[174],"This":[175],"mitigates":[176],"synchronization":[177],"overhead":[178],"across":[179,208],"diverse":[180],"Compared":[183],"inference":[185],"conventional":[187],"schemes,":[190],"achieves":[192],"<tex":[193,200],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[194,201],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.1":[195],"\\times-2.6":[196],"\\times$</tex>":[197,204],"speedup":[198],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1.5":[202],"\\times-4.4":[203],"higher":[205],"different":[209],"DNN":[210],"LLM":[212]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-03-05T00:00:00"}
