{"id":"https://openalex.org/W4381327166","doi":"https://doi.org/10.1145/3577193.3593724","title":"Software-Hardware Co-design of Heterogeneous SmartNIC System for Recommendation Models Inference and Training","display_name":"Software-Hardware Co-design of Heterogeneous SmartNIC System for Recommendation Models Inference and Training","publication_year":2023,"publication_date":"2023-06-20","ids":{"openalex":"https://openalex.org/W4381327166","doi":"https://doi.org/10.1145/3577193.3593724"},"language":"en","primary_location":{"id":"doi:10.1145/3577193.3593724","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3577193.3593724","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th International Conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085745642","display_name":"Anqi Guo","orcid":"https://orcid.org/0000-0001-5872-4464"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]},{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anqi Guo","raw_affiliation_strings":["Boston University, Boston, USA","University of Rochester, Rochester, USA","Boston University, Boston, USA University of Rochester, Rochester, USA"],"raw_orcid":"https://orcid.org/0000-0001-5872-4464","affiliations":[{"raw_affiliation_string":"Boston University, Boston, USA","institution_ids":[]},{"raw_affiliation_string":"University of Rochester, Rochester, USA","institution_ids":["https://openalex.org/I5388228"]},{"raw_affiliation_string":"Boston University, Boston, USA University of Rochester, Rochester, USA","institution_ids":["https://openalex.org/I5388228","https://openalex.org/I111088046"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090104788","display_name":"Yuchen Hao","orcid":"https://orcid.org/0009-0005-8513-9566"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuchen Hao","raw_affiliation_strings":["Meta Platforms, San Francisco, USA"],"raw_orcid":"https://orcid.org/0009-0005-8513-9566","affiliations":[{"raw_affiliation_string":"Meta Platforms, San Francisco, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009432979","display_name":"Chunshu Wu","orcid":"https://orcid.org/0009-0006-2039-0853"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chunshu Wu","raw_affiliation_strings":["Boston University, Boston, USA"],"raw_orcid":"https://orcid.org/0009-0006-2039-0853","affiliations":[{"raw_affiliation_string":"Boston University, Boston, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067209238","display_name":"Pouya Haghi","orcid":"https://orcid.org/0000-0003-2893-9194"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pouya Haghi","raw_affiliation_strings":["Boston University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0003-2893-9194","affiliations":[{"raw_affiliation_string":"Boston University, Boston, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102017711","display_name":"Zhenyu Pan","orcid":"https://orcid.org/0009-0009-2805-6018"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhenyu Pan","raw_affiliation_strings":["University of Rochester, Rochester, USA"],"raw_orcid":"https://orcid.org/0009-0009-2805-6018","affiliations":[{"raw_affiliation_string":"University of Rochester, Rochester, USA","institution_ids":["https://openalex.org/I5388228"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058015880","display_name":"Min Si","orcid":"https://orcid.org/0000-0002-0208-096X"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Min Si","raw_affiliation_strings":["Meta Platforms, San Francisco, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-0208-096X","affiliations":[{"raw_affiliation_string":"Meta Platforms, San Francisco, United States of America","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063703614","display_name":"Dingwen Tao","orcid":"https://orcid.org/0000-0001-5422-4497"},"institutions":[{"id":"https://openalex.org/I4210119109","display_name":"Indiana University Bloomington","ror":"https://ror.org/02k40bc56","country_code":"US","type":"education","lineage":["https://openalex.org/I4210119109","https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dingwen Tao","raw_affiliation_strings":["Indiana University, Bloomington, United States of America"],"raw_orcid":"https://orcid.org/0000-0001-5422-4497","affiliations":[{"raw_affiliation_string":"Indiana University, Bloomington, United States of America","institution_ids":["https://openalex.org/I4210119109"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100413657","display_name":"Ang Li","orcid":"https://orcid.org/0000-0003-3734-9137"},"institutions":[{"id":"https://openalex.org/I142606810","display_name":"Pacific Northwest National Laboratory","ror":"https://ror.org/05h992307","country_code":"US","type":"facility","lineage":["https://openalex.org/I1325736334","https://openalex.org/I1330989302","https://openalex.org/I142606810","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ang Li","raw_affiliation_strings":["Pacific Northwest National Laboratory, Richland, United States of America"],"raw_orcid":"https://orcid.org/0000-0003-3734-9137","affiliations":[{"raw_affiliation_string":"Pacific Northwest National Laboratory, Richland, United States of America","institution_ids":["https://openalex.org/I142606810"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021051610","display_name":"Martin Herbordt","orcid":"https://orcid.org/0000-0002-3443-9113"},"institutions":[{"id":"https://openalex.org/I111088046","display_name":"Boston University","ror":"https://ror.org/05qwgg493","country_code":"US","type":"education","lineage":["https://openalex.org/I111088046"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Martin Herbordt","raw_affiliation_strings":["Boston University, Boston, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-3443-9113","affiliations":[{"raw_affiliation_string":"Boston University, Boston, United States of America","institution_ids":["https://openalex.org/I111088046"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078443672","display_name":"Tong Geng","orcid":"https://orcid.org/0000-0002-3644-2922"},"institutions":[{"id":"https://openalex.org/I5388228","display_name":"University of Rochester","ror":"https://ror.org/022kthw22","country_code":"US","type":"education","lineage":["https://openalex.org/I5388228"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tong Geng","raw_affiliation_strings":["University of Rochester, Rochester, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-3644-2922","affiliations":[{"raw_affiliation_string":"University of Rochester, Rochester, United States of America","institution_ids":["https://openalex.org/I5388228"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":9.4897,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.97950366,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"336","last_page":"347"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8402920961380005},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8349853754043579},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.8040031790733337},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7576714754104614},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.5133349299430847},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5026774406433105},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4987940788269043},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44976574182510376},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4257926940917969},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.42273572087287903},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41525357961654663},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3479400873184204},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.25471800565719604},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.1847340166568756},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.16144037246704102}],"concepts":[{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8402920961380005},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8349853754043579},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.8040031790733337},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7576714754104614},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.5133349299430847},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5026774406433105},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4987940788269043},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44976574182510376},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4257926940917969},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.42273572087287903},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41525357961654663},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3479400873184204},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.25471800565719604},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.1847340166568756},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.16144037246704102},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3577193.3593724","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3577193.3593724","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 37th International Conference on Supercomputing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W2155299811","https://openalex.org/W2210543184","https://openalex.org/W2271840356","https://openalex.org/W2512971201","https://openalex.org/W2614794251","https://openalex.org/W2755567635","https://openalex.org/W2774000609","https://openalex.org/W2789197829","https://openalex.org/W2794670651","https://openalex.org/W2900810680","https://openalex.org/W2947737663","https://openalex.org/W2949017472","https://openalex.org/W2972269283","https://openalex.org/W2988347281","https://openalex.org/W2996428491","https://openalex.org/W3010969086","https://openalex.org/W3012897490","https://openalex.org/W3084025671","https://openalex.org/W3093739590","https://openalex.org/W3100127252","https://openalex.org/W3131663603","https://openalex.org/W3138787737","https://openalex.org/W3155243801","https://openalex.org/W3188766293","https://openalex.org/W3197720002","https://openalex.org/W4200482086","https://openalex.org/W4200623295","https://openalex.org/W4210258659","https://openalex.org/W4214658871","https://openalex.org/W4239385313","https://openalex.org/W4281634017","https://openalex.org/W4281762513","https://openalex.org/W4289276774","https://openalex.org/W4299444095","https://openalex.org/W4301239768","https://openalex.org/W4321637359"],"related_works":["https://openalex.org/W1657880117","https://openalex.org/W2595172197","https://openalex.org/W2127970246","https://openalex.org/W2084856301","https://openalex.org/W1001352512","https://openalex.org/W4382618745","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W2748922771","https://openalex.org/W1987128138"],"abstract_inverted_index":{"Deep":[0],"Learning":[1],"Recommendation":[2],"Models":[3],"(DLRMs)":[4],"are":[5],"important":[6,21],"applications":[7],"in":[8],"various":[9],"domains":[10],"and":[11,19,51],"have":[12],"evolved":[13],"into":[14],"one":[15],"of":[16,28,38],"the":[17,32,58],"largest":[18],"most":[20],"machine":[22],"learning":[23],"applications.":[24],"With":[25],"their":[26],"trillions":[27],"parameters":[29],"necessarily":[30],"exceeding":[31],"high":[33],"bandwidth":[34],"memory":[35],"(HBM)":[36],"capacity":[37],"GPUs,":[39],"ever":[40],"more":[41],"massive":[42],"DLRMs":[43],"require":[44],"large-scale":[45],"multi-node":[46],"systems":[47],"for":[48],"distributed":[49],"training":[50],"inference.":[52],"However,":[53],"these":[54],"all":[55],"suffer":[56],"from":[57],"all-to-all":[59],"communication":[60],"bottleneck,":[61],"which":[62],"limits":[63],"scalability.":[64]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":17}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
