{"id":"https://openalex.org/W3193034122","doi":"https://doi.org/10.1109/tcsi.2021.3098841","title":"Scalable Fully Pipelined Hardware Architecture for In-Network Aggregated AllReduce Communication","display_name":"Scalable Fully Pipelined Hardware Architecture for In-Network Aggregated AllReduce Communication","publication_year":2021,"publication_date":"2021-07-29","ids":{"openalex":"https://openalex.org/W3193034122","doi":"https://doi.org/10.1109/tcsi.2021.3098841","mag":"3193034122"},"language":"en","primary_location":{"id":"doi:10.1109/tcsi.2021.3098841","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsi.2021.3098841","pdf_url":null,"source":{"id":"https://openalex.org/S116977442","display_name":"IEEE Transactions on Circuits and Systems I Regular Papers","issn_l":"1549-8328","issn":["1549-8328","1558-0806"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems I: Regular Papers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100620899","display_name":"Yao Liu","orcid":"https://orcid.org/0000-0003-2719-4055"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Yao Liu","raw_affiliation_strings":["Department of Electrical Engineering, City University of Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0003-2719-4055","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, City University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103102029","display_name":"Junyi Zhang","orcid":"https://orcid.org/0000-0002-5530-5659"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junyi Zhang","raw_affiliation_strings":["Huawei Company, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-5530-5659","affiliations":[{"raw_affiliation_string":"Huawei Company, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026622212","display_name":"Shuo Liu","orcid":"https://orcid.org/0000-0002-3546-4070"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuo Liu","raw_affiliation_strings":["Huawei Company, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-3546-4070","affiliations":[{"raw_affiliation_string":"Huawei Company, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Qiaoling Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiaoling Wang","raw_affiliation_strings":["Huawei Company, Shenzhen, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Company, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075504049","display_name":"Wangchen Dai","orcid":"https://orcid.org/0000-0002-5192-1649"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wangchen Dai","raw_affiliation_strings":["ByteDance Ltd., Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-5192-1649","affiliations":[{"raw_affiliation_string":"ByteDance Ltd., Shenzhen, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077847474","display_name":"Ray C. C. Cheung","orcid":"https://orcid.org/0000-0002-6764-0729"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ray Chak Chung Cheung","raw_affiliation_strings":["Department of Electrical Engineering, City University of Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-6764-0729","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, City University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100620899"],"corresponding_institution_ids":["https://openalex.org/I168719708"],"apc_list":null,"apc_paid":null,"fwci":1.0167,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.75915338,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"68","issue":"10","first_page":"4194","last_page":"4206"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7946997284889221},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.724594235420227},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7096903324127197},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5214016437530518},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.47772952914237976},{"id":"https://openalex.org/keywords/hardware-architecture","display_name":"Hardware architecture","score":0.4298717677593231},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3830711841583252},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.36733677983283997},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.3184940814971924},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.13697844743728638},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.09450793266296387}],"concepts":[{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7946997284889221},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.724594235420227},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7096903324127197},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5214016437530518},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.47772952914237976},{"id":"https://openalex.org/C65232700","wikidata":"https://www.wikidata.org/wiki/Q5656403","display_name":"Hardware architecture","level":3,"score":0.4298717677593231},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3830711841583252},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.36733677983283997},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.3184940814971924},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.13697844743728638},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.09450793266296387},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsi.2021.3098841","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsi.2021.3098841","pdf_url":null,"source":{"id":"https://openalex.org/S116977442","display_name":"IEEE Transactions on Circuits and Systems I Regular Papers","issn_l":"1549-8328","issn":["1549-8328","1558-0806"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems I: Regular Papers","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.6299999952316284,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1442374986","https://openalex.org/W1686810756","https://openalex.org/W2000027649","https://openalex.org/W2036472141","https://openalex.org/W2060393849","https://openalex.org/W2097117768","https://openalex.org/W2134519279","https://openalex.org/W2163605009","https://openalex.org/W2194775991","https://openalex.org/W2498764059","https://openalex.org/W2525778437","https://openalex.org/W2612387305","https://openalex.org/W2740001873","https://openalex.org/W2787998955","https://openalex.org/W2899618753","https://openalex.org/W2913561863","https://openalex.org/W2922527104","https://openalex.org/W2926767350","https://openalex.org/W2949161920","https://openalex.org/W2955454939","https://openalex.org/W2963786636","https://openalex.org/W2964104075","https://openalex.org/W2973727699","https://openalex.org/W3004495293","https://openalex.org/W3037182822","https://openalex.org/W3087129161","https://openalex.org/W3101708369","https://openalex.org/W3121823381","https://openalex.org/W4253731216","https://openalex.org/W4301239768","https://openalex.org/W6628377381","https://openalex.org/W6637373629","https://openalex.org/W6684191040","https://openalex.org/W6748645090","https://openalex.org/W6760703128","https://openalex.org/W6767997687","https://openalex.org/W6783526588"],"related_works":["https://openalex.org/W2499279132","https://openalex.org/W1967938402","https://openalex.org/W2386041993","https://openalex.org/W1608572506","https://openalex.org/W2160474882","https://openalex.org/W121182129","https://openalex.org/W2281932057","https://openalex.org/W2585970387","https://openalex.org/W4230310076","https://openalex.org/W1966261340"],"abstract_inverted_index":{"The":[0,73],"Ring-AllReduce":[1],"framework":[2],"is":[3,75,95],"currently":[4],"the":[5,21,28,43,112,134,152,158,174],"most":[6],"popular":[7],"solution":[8],"to":[9,41,84,97,99,139,142,151],"deploy":[10],"industry-level":[11],"distributed":[12],"machine":[13],"learning":[14],"tasks.":[15],"However,":[16],"only":[17],"about":[18],"half":[19],"of":[20,115,121],"maximum":[22],"bandwidth":[23,71,114],"can":[24],"be":[25,163],"achieved":[26],"in":[27],"optimal":[29],"condition.":[30],"In":[31,52,130,168],"recent":[32],"years,":[33],"several":[34],"in-network":[35],"aggregation":[36,66],"frameworks":[37],"have":[38,49],"been":[39,50],"proposed":[40,135],"overcome":[42],"drawback,":[44],"but":[45],"limited":[46],"hardware":[47,136],"information":[48],"disclosed.":[51],"this":[53],"paper,":[54],"we":[55],"propose":[56],"a":[57,78,119],"scalable":[58],"fully-pipelined":[59],"architecture":[60,74,137],"that":[61,82],"handles":[62],"tasks":[63],"like":[64],"forwarding,":[65],"and":[67,93,179,189],"retransmission":[68],"with":[69,88,107,118,177],"no":[70],"loss.":[72],"implemented":[76],"on":[77],"Xilinx":[79],"Ultrascale":[80],"FPGA":[81],"connects":[83],"8":[85],"working":[86],"servers":[87],"10":[89],"Gb/s":[90],"network":[91],"adapters,":[92],"it":[94],"able":[96],"scale":[98],"more":[100,104],"complicated":[101],"scenarios":[102],"involving":[103],"workers.":[105],"Compared":[106],"Ring-AllReduce,":[108],"using":[109],"AllReduce-Switch":[110,172],"improves":[111,173],"efficient":[113],"AllReduce":[116],"communication":[117,161],"ratio":[120],"<inline-formula":[122,143,182,190],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[123,144,183,191],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[124,145,184,192],"<tex-math":[125,146,185,193],"notation=\"LaTeX\">$1.75\\times":[126],"$":[127,148,187,195],"</tex-math></inline-formula>":[128,149,188,196],".":[129],"image":[131],"training":[132,153,175],"tasks,":[133],"helps":[138],"achieve":[140],"up":[141],"notation=\"LaTeX\">$1.67\\times":[147],"speedup":[150,159],"process.":[154],"For":[155],"computing-intensive":[156],"models,":[157],"from":[160],"may":[162],"partially":[164],"hidden":[165],"by":[166,181],"computing.":[167],"particular,":[169],"for":[170],"ResNet-50,":[171],"process":[176],"MPI":[178],"NCCL":[180],"notation=\"LaTeX\">$1.30\\times":[186],"notation=\"LaTeX\">$1.04\\times":[194],"respectively.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
