{"id":"https://openalex.org/W4387064008","doi":"https://doi.org/10.1109/hcs59251.2023.10254703","title":"A Scalable Multi-Chiplet Deep Learning Accelerator with Hub-Side 2.5D Heterogeneous Integration","display_name":"A Scalable Multi-Chiplet Deep Learning Accelerator with Hub-Side 2.5D Heterogeneous Integration","publication_year":2023,"publication_date":"2023-08-27","ids":{"openalex":"https://openalex.org/W4387064008","doi":"https://doi.org/10.1109/hcs59251.2023.10254703"},"language":"en","primary_location":{"id":"doi:10.1109/hcs59251.2023.10254703","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hcs59251.2023.10254703","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Hot Chips 35 Symposium (HCS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083108300","display_name":"Zhanhong Tan","orcid":"https://orcid.org/0000-0001-6725-3211"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhanhong Tan","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029720510","display_name":"Yifu Wu","orcid":"https://orcid.org/0000-0001-5132-2980"},"institutions":[{"id":"https://openalex.org/I4210135473","display_name":"Polar (Finland)","ror":"https://ror.org/03xptpz88","country_code":"FI","type":"company","lineage":["https://openalex.org/I4210135473"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Yifu Wu","raw_affiliation_strings":["Polar Bear Tech"],"affiliations":[{"raw_affiliation_string":"Polar Bear Tech","institution_ids":["https://openalex.org/I4210135473"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014891642","display_name":"Yannian Zhang","orcid":"https://orcid.org/0000-0001-7772-0063"},"institutions":[{"id":"https://openalex.org/I4210135473","display_name":"Polar (Finland)","ror":"https://ror.org/03xptpz88","country_code":"FI","type":"company","lineage":["https://openalex.org/I4210135473"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Yannian Zhang","raw_affiliation_strings":["Polar Bear Tech"],"affiliations":[{"raw_affiliation_string":"Polar Bear Tech","institution_ids":["https://openalex.org/I4210135473"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032944232","display_name":"Haobing Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135473","display_name":"Polar (Finland)","ror":"https://ror.org/03xptpz88","country_code":"FI","type":"company","lineage":["https://openalex.org/I4210135473"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Haobing Shi","raw_affiliation_strings":["Polar Bear Tech"],"affiliations":[{"raw_affiliation_string":"Polar Bear Tech","institution_ids":["https://openalex.org/I4210135473"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074197504","display_name":"Wuke Zhang","orcid":"https://orcid.org/0000-0003-1213-413X"},"institutions":[{"id":"https://openalex.org/I4210135473","display_name":"Polar (Finland)","ror":"https://ror.org/03xptpz88","country_code":"FI","type":"company","lineage":["https://openalex.org/I4210135473"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Wuke Zhang","raw_affiliation_strings":["Polar Bear Tech"],"affiliations":[{"raw_affiliation_string":"Polar Bear Tech","institution_ids":["https://openalex.org/I4210135473"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006570986","display_name":"Kaisheng Ma","orcid":"https://orcid.org/0000-0001-9226-3366"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaisheng Ma","raw_affiliation_strings":["Tsinghua University"],"affiliations":[{"raw_affiliation_string":"Tsinghua University","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5083108300"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.5378,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.81972362,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9760000109672546,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8188419342041016},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7062499523162842},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5831727385520935},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5346288681030273},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.4208185374736786},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3973461091518402},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3784247934818268},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3728266954421997},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.190720796585083},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.15063706040382385}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8188419342041016},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7062499523162842},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5831727385520935},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5346288681030273},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.4208185374736786},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3973461091518402},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3784247934818268},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3728266954421997},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.190720796585083},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.15063706040382385},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hcs59251.2023.10254703","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hcs59251.2023.10254703","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE Hot Chips 35 Symposium (HCS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2364921833","https://openalex.org/W2385146268","https://openalex.org/W1569389315","https://openalex.org/W2503642292","https://openalex.org/W2380023786","https://openalex.org/W2546696010","https://openalex.org/W1992741870","https://openalex.org/W1993148641","https://openalex.org/W2687174480","https://openalex.org/W1975010174"],"abstract_inverted_index":{"With":[0],"the":[1,6,13,54,60,148,188,200,207],"slowdown":[2],"of":[3,9,16,177,199,206],"Moore's":[4],"law,":[5],"scenario":[7],"diversity":[8],"specialized":[10],"computing,":[11],"and":[12,26,59,64,106,130,145,165],"rapid":[14],"development":[15],"application":[17],"algorithms,":[18],"an":[19,50],"efficient":[20,115],"chip":[21],"design":[22,92],"requires":[23],"modularization,":[24],"flexibility,":[25],"scalability.":[27],"In":[28],"this":[29],"study,":[30],"we":[31,81,134],"propose_":[32],"a":[33,89,99,109,121,136,174],"Chiplet,-based":[34],"deep,":[35],"learning":[36],"accelerator":[37,172],"protoype":[38],"that":[39,70,205],"-contains":[40],"oneHUB":[41],".":[42],"Chipletand,":[43],"six.":[44],"extended":[45],"SIDE-":[46],"Chiplets":[47],"integrated":[48],"on":[49],"RDL":[51],"layer":[52],"for":[53,98,181],"2.5D":[55],"package.":[56],"The":[57,169,196],"SIDE":[58,79,186],"HUB":[61],"contain":[62],"one":[63],"four":[65],"AI":[66],"cores,":[67],"respectively.":[68],"Given":[69],"our":[71],"Chiplet-system":[72],"targets":[73],"diverse":[74,94],"scenarios":[75],"via":[76],"scalable":[77],"connected":[78],"Chiplets,":[80,187],"need":[82],"to":[83,113,140,155,194],"handle":[84],"three":[85],"challenges:":[86],"a)":[87],"devise":[88],"flexible":[90,122,131],"architecture":[91],"supporting":[93],"shapes,":[95],"b)":[96],"search":[97],"workload":[100],"mapping":[101],"with":[102,162],"low":[103],"die-to-die":[104,111],"communication,":[105],"c)":[107],"adopt":[108],"high-bandwidth":[110],"interface":[112,152],"maintain":[114],"data":[116],"transfer.":[117],"This":[118],"study":[119],"proposes":[120],"neural":[123],"core":[124],"(FNC)":[125],"featuring":[126],"dynamic":[127],"bit-width":[128],"computing":[129],"parallelism.":[132],"Next,":[133],"use":[135],"hierarchy-based":[137],"mapping.":[138],"scheme":[139],"decouple":[141],"different":[142],"parallelism":[143],"levels":[144],"help":[146],"analyze":[147],"communication.":[149],"A":[150],"12Gbps,_D2D":[151],"is":[153,202,210],"introduced":[154],"achieve":[156],"192Gb/s":[157],"bandwidth":[158],"per":[159],"D2D":[160],"port":[161],"1.04pJ/bit":[163],"efficiency":[164,198],"55um":[166],"bump":[167],"pitch.":[168],"proposed":[170],"seven-Chiplet":[171],"achieves":[173],"peak":[175],"performance":[176],"1":[178],"0/20/40":[179],"TOPS":[180],"INT16/8/4.":[182],"When":[183],"enabling":[184],"0~6":[185],"system":[189,209],"power":[190,197],"ranges":[191],"from":[192],"4.5W":[193],"12W.":[195],"FNC":[201],"2.02TOPS/W":[203],"while":[204],"overall":[208],"1.67TOPS/W.":[211]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
