{"id":"https://openalex.org/W3158146252","doi":"https://doi.org/10.1109/hpca51647.2021.00072","title":"Understanding Training Efficiency of Deep Learning Recommendation Models at Scale","display_name":"Understanding Training Efficiency of Deep Learning Recommendation Models at Scale","publication_year":2021,"publication_date":"2021-02-01","ids":{"openalex":"https://openalex.org/W3158146252","doi":"https://doi.org/10.1109/hpca51647.2021.00072","mag":"3158146252"},"language":"en","primary_location":{"id":"doi:10.1109/hpca51647.2021.00072","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca51647.2021.00072","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005070359","display_name":"Bilge Acun","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]},{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bilge Acun","raw_affiliation_strings":["Facebook AI Research, Menlo Park, USA"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research, Menlo Park, USA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084758931","display_name":"M. Murphy","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]},{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew Murphy","raw_affiliation_strings":["Facebook, Menlo Park, USA"],"affiliations":[{"raw_affiliation_string":"Facebook, Menlo Park, USA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100382667","display_name":"Xiaodong Wang","orcid":"https://orcid.org/0000-0002-4533-6734"},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]},{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaodong Wang","raw_affiliation_strings":["Facebook, Menlo Park, USA"],"affiliations":[{"raw_affiliation_string":"Facebook, Menlo Park, USA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004002767","display_name":"Jade Nie","orcid":"https://orcid.org/0009-0008-1664-2805"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]},{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jade Nie","raw_affiliation_strings":["Facebook, Menlo Park, USA"],"affiliations":[{"raw_affiliation_string":"Facebook, Menlo Park, USA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028220093","display_name":"Carole-Jean Wu","orcid":"https://orcid.org/0000-0002-9032-7239"},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]},{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carole-Jean Wu","raw_affiliation_strings":["Facebook AI Research, Menlo Park, USA"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research, Menlo Park, USA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046756907","display_name":"Kim Hazelwood","orcid":"https://orcid.org/0000-0002-2713-8507"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]},{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kim Hazelwood","raw_affiliation_strings":["Facebook AI Research, Menlo Park, USA"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research, Menlo Park, USA","institution_ids":["https://openalex.org/I4210114444","https://openalex.org/I4210099336"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5005070359"],"corresponding_institution_ids":["https://openalex.org/I4210099336","https://openalex.org/I4210114444"],"apc_list":null,"apc_paid":null,"fwci":8.8028,"has_fulltext":false,"cited_by_count":78,"citation_normalized_percentile":{"value":0.98161824,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"802","last_page":"814"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8289052844047546},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.705334484577179},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6148107647895813},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.597786545753479},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5513196587562561},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5416882038116455},{"id":"https://openalex.org/keywords/mainstream","display_name":"Mainstream","score":0.5005543231964111},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4408155083656311},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.42667219042778015},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.41888168454170227},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37536364793777466},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.14657056331634521}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8289052844047546},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.705334484577179},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6148107647895813},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.597786545753479},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5513196587562561},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5416882038116455},{"id":"https://openalex.org/C2777617010","wikidata":"https://www.wikidata.org/wiki/Q18957","display_name":"Mainstream","level":2,"score":0.5005543231964111},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4408155083656311},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.42667219042778015},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.41888168454170227},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37536364793777466},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.14657056331634521},{"id":"https://openalex.org/C27206212","wikidata":"https://www.wikidata.org/wiki/Q34178","display_name":"Theology","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca51647.2021.00072","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca51647.2021.00072","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":80,"referenced_works":["https://openalex.org/W1982063824","https://openalex.org/W2002555321","https://openalex.org/W2084660366","https://openalex.org/W2114079787","https://openalex.org/W2119528150","https://openalex.org/W2138243089","https://openalex.org/W2168231600","https://openalex.org/W2239144794","https://openalex.org/W2475334473","https://openalex.org/W2512971201","https://openalex.org/W2553581924","https://openalex.org/W2593864460","https://openalex.org/W2617960902","https://openalex.org/W2622263826","https://openalex.org/W2702150584","https://openalex.org/W2723293840","https://openalex.org/W2749733699","https://openalex.org/W2769856846","https://openalex.org/W2794670651","https://openalex.org/W2800462658","https://openalex.org/W2889326796","https://openalex.org/W2891896107","https://openalex.org/W2900810680","https://openalex.org/W2933138175","https://openalex.org/W2947737663","https://openalex.org/W2949364482","https://openalex.org/W2962745591","https://openalex.org/W2962863496","https://openalex.org/W2963150697","https://openalex.org/W2963228337","https://openalex.org/W2963351448","https://openalex.org/W2963712608","https://openalex.org/W2963804082","https://openalex.org/W2967733054","https://openalex.org/W2975367729","https://openalex.org/W2979245724","https://openalex.org/W2979719709","https://openalex.org/W2999012726","https://openalex.org/W3008591352","https://openalex.org/W3010969086","https://openalex.org/W3011515423","https://openalex.org/W3012897490","https://openalex.org/W3016842236","https://openalex.org/W3021124033","https://openalex.org/W3041360407","https://openalex.org/W3043023836","https://openalex.org/W3043433718","https://openalex.org/W3043571714","https://openalex.org/W3095776306","https://openalex.org/W3095994716","https://openalex.org/W3104506006","https://openalex.org/W3109610142","https://openalex.org/W3158783012","https://openalex.org/W4298840580","https://openalex.org/W4299585995","https://openalex.org/W6638803421","https://openalex.org/W6680402377","https://openalex.org/W6684859321","https://openalex.org/W6730009442","https://openalex.org/W6734897383","https://openalex.org/W6738250615","https://openalex.org/W6739622702","https://openalex.org/W6740154879","https://openalex.org/W6740239475","https://openalex.org/W6745458143","https://openalex.org/W6746514494","https://openalex.org/W6750194011","https://openalex.org/W6755257872","https://openalex.org/W6756363772","https://openalex.org/W6756944905","https://openalex.org/W6763737044","https://openalex.org/W6767799710","https://openalex.org/W6769062451","https://openalex.org/W6769454693","https://openalex.org/W6774806506","https://openalex.org/W6774821467","https://openalex.org/W6776283548","https://openalex.org/W6783531377","https://openalex.org/W6784348005","https://openalex.org/W6785257627"],"related_works":["https://openalex.org/W2377237701","https://openalex.org/W2360099860","https://openalex.org/W1583826057","https://openalex.org/W2352463596","https://openalex.org/W2380850119","https://openalex.org/W2101450440","https://openalex.org/W2383675217","https://openalex.org/W4323893170","https://openalex.org/W2376151201","https://openalex.org/W2393898889"],"abstract_inverted_index":{"The":[0,94],"use":[1,40],"of":[2,32,41,59,96,104],"GPUs":[3,42,106],"has":[4],"proliferated":[5],"for":[6,15,107],"machine":[7],"learning":[8,18],"workflows":[9],"and":[10,52,57,73,117],"is":[11,99],"now":[12],"considered":[13],"mainstream":[14],"many":[16],"deep":[17],"models.":[19],"Meanwhile,":[20],"when":[21],"training":[22,108],"state-of-the-art":[23],"personal":[24],"recommendation":[25,61,109],"models,":[26,110],"which":[27],"consume":[28],"the":[29,39,102],"highest":[30],"number":[31],"compute":[33],"cycles":[34],"at":[35,115],"our":[36],"large-scale":[37],"datacenters,":[38],"came":[43],"with":[44],"various":[45],"challenges":[46],"due":[47],"to":[48,100],"having":[49],"both":[50],"compute-intensive":[51],"memory-intensive":[53],"components.":[54],"GPU":[55,92,123],"performance":[56],"efficiency":[58,114],"these":[60,79],"models":[62,80],"are":[63],"largely":[64],"affected":[65],"by":[66],"model":[67],"architecture":[68],"configurations":[69],"such":[70],"as":[71],"dense":[72],"sparse":[74],"features,":[75],"MLP":[76],"dimensions.":[77],"Furthermore,":[78],"often":[81],"contain":[82],"large":[83],"embedding":[84],"tables":[85],"that":[86],"do":[87],"not":[88],"fit":[89],"into":[90],"limited":[91],"memory.":[93],"goal":[95],"this":[97],"paper":[98],"explain":[101],"intricacies":[103],"using":[105],"factors":[111],"affecting":[112],"hardware":[113],"scale,":[116],"learnings":[118],"from":[119],"a":[120],"new":[121],"scale-up":[122],"server":[124],"design,":[125],"Zion.":[126]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":14},{"year":2024,"cited_by_count":24},{"year":2023,"cited_by_count":16},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":11}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
