{"id":"https://openalex.org/W4296591840","doi":"https://doi.org/10.1145/3523227.3547405","title":"Merlin HugeCTR: GPU-accelerated Recommender System Training and Inference","display_name":"Merlin HugeCTR: GPU-accelerated Recommender System Training and Inference","publication_year":2022,"publication_date":"2022-09-13","ids":{"openalex":"https://openalex.org/W4296591840","doi":"https://doi.org/10.1145/3523227.3547405"},"language":"en","primary_location":{"id":"doi:10.1145/3523227.3547405","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3523227.3547405","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM Conference on Recommender Systems","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2210.08803","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020576598","display_name":"Zehuan Wang","orcid":"https://orcid.org/0000-0002-1072-2651"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zehuan Wang","raw_affiliation_strings":["NVIDIA, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056170667","display_name":"Yingcan Wei","orcid":"https://orcid.org/0000-0002-5093-7382"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yingcan Wei","raw_affiliation_strings":["NVIDIA, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031393194","display_name":"Minseok Lee","orcid":"https://orcid.org/0000-0002-8367-1939"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Minseok Lee","raw_affiliation_strings":["NVIDIA, Korea, Republic of"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Korea, Republic of","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102004952","display_name":"Matthias Langer","orcid":"https://orcid.org/0000-0003-1776-8000"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matthias Langer","raw_affiliation_strings":["NVIDIA, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100616402","display_name":"Fan Yu","orcid":"https://orcid.org/0000-0002-4978-0023"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan Yu","raw_affiliation_strings":["NVIDIA, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100454211","display_name":"Jie Liu","orcid":"https://orcid.org/0000-0003-3745-7541"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jie Liu","raw_affiliation_strings":["NVIDIA, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100370044","display_name":"Shijie Liu","orcid":"https://orcid.org/0009-0003-1935-1783"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shijie Liu","raw_affiliation_strings":["NVIDIA, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083837704","display_name":"Daniel G. Abel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daniel G. Abel","raw_affiliation_strings":["NVIDIA, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104089978","display_name":"Xu Guo","orcid":"https://orcid.org/0009-0004-1258-8232"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu Guo","raw_affiliation_strings":["NVIDIA, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021111776","display_name":"Jianbing Dong","orcid":"https://orcid.org/0000-0002-1910-6417"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianbing Dong","raw_affiliation_strings":["NVIDIA, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041948343","display_name":"Ji Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji Shi","raw_affiliation_strings":["NVIDIA, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101543891","display_name":"Kunlun Li","orcid":"https://orcid.org/0000-0002-5797-6560"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kunlun Li","raw_affiliation_strings":["NVIDIA, China"],"affiliations":[{"raw_affiliation_string":"NVIDIA, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5020576598"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.6511,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.91419717,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"534","last_page":"537"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8813153505325317},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7653555274009705},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6802091598510742},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6268354058265686},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.48366793990135193},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.47896459698677063},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.44852960109710693},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4443019926548004},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3471473455429077},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3426166772842407}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8813153505325317},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7653555274009705},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6802091598510742},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6268354058265686},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.48366793990135193},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.47896459698677063},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.44852960109710693},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4443019926548004},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3471473455429077},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3426166772842407},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3523227.3547405","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3523227.3547405","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM Conference on Recommender Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2210.08803","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.08803","pdf_url":"https://arxiv.org/pdf/2210.08803","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2210.08803","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.08803","pdf_url":"https://arxiv.org/pdf/2210.08803","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2339765813","https://openalex.org/W2475334473","https://openalex.org/W2589642470","https://openalex.org/W2792078035","https://openalex.org/W2947737663","https://openalex.org/W2953384591","https://openalex.org/W2964108773","https://openalex.org/W2964182926","https://openalex.org/W2972087877","https://openalex.org/W3010969086","https://openalex.org/W3036703963","https://openalex.org/W3043433718","https://openalex.org/W3125012172","https://openalex.org/W3152501898","https://openalex.org/W3167625290","https://openalex.org/W4240329257","https://openalex.org/W4289401659","https://openalex.org/W4295312788"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2013643406","https://openalex.org/W2027972911","https://openalex.org/W2097707447","https://openalex.org/W3128807919","https://openalex.org/W3176411177"],"abstract_inverted_index":{"In":[0,39,65],"this":[1,145],"talk,":[2],"we":[3],"introduce":[4],"Merlin":[5,7,41,73,97,114,147],"HugeCTR.":[6],"HugeCTR":[8,42,74,98,115,148],"is":[9],"an":[10,50],"open":[11],"source,":[12],"GPU-accelerated":[13],"integration":[14],"framework":[15],"for":[16,60,139,158],"click-through":[17],"rate":[18],"estimation.":[19],"It":[20],"optimizes":[21],"both":[22],"training":[23,29,71,108],"and":[24,35,123,166],"inference,":[25],"whilst":[26],"enabling":[27],"model":[28,62,70,142],"at":[30],"scale":[31],"with":[32,49],"model-parallel":[33],"embeddings":[34,59],"data-parallel":[36],"neural":[37],"networks.":[38],"particular,":[40],"combines":[43],"a":[44,76,83,118,152],"high-performance":[45],"GPU":[46],"embedding":[47],"cache":[48],"hierarchical":[51,119],"storage":[52],"architecture,":[53],"to":[54,80,106,132],"realize":[55],"low-latency":[56],"retrieval":[57],"of":[58,78,103,137],"online":[61],"inference":[63,171],"tasks.":[64],"the":[66,127,134],"MLPerf":[67],"v1.0":[68],"DLRM":[69],"benchmark,":[72],"achieves":[75],"speedup":[77,154],"up":[79],"24.6x":[81],"on":[82,91],"single":[84],"DGX":[85],"A100":[86],"(8x":[87],"A100)":[88],"over":[89,162],"PyTorch":[90],"4x4-socket":[92],"CPU":[93,163],"nodes":[94],"(4x4x28":[95],"cores).":[96],"can":[99,150],"also":[100],"take":[101],"advantage":[102],"multi-node":[104],"environments":[105],"accelerate":[107],"even":[109],"further.":[110],"Since":[111],"late":[112],"2021,":[113],"additionally":[116],"features":[117],"parameter":[120],"server":[121,130],"(HPS)":[122],"supports":[124],"deployment":[125],"via":[126],"NVIDIA":[128],"Triton":[129],"framework,":[131],"leverage":[133],"computational":[135],"capabilities":[136],"GPUs":[138],"high-speed":[140],"recommendation":[141,160],"inference.":[143],"Using":[144],"HPS,":[146],"users":[149],"achieve":[151],"5~62x":[153],"(batch":[155],"size":[156],"dependent)":[157],"popular":[159],"models":[161],"baseline":[164],"implementations,":[165],"dramatically":[167],"reduce":[168],"their":[169],"end-to-end":[170],"latency.":[172]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2022-09-22T00:00:00"}
