{"id":"https://openalex.org/W3043433718","doi":"https://doi.org/10.1109/isca45697.2020.00084","title":"DeepRecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference","display_name":"DeepRecSys: A System for Optimizing End-To-End At-Scale Neural Recommendation Inference","publication_year":2020,"publication_date":"2020-05-01","ids":{"openalex":"https://openalex.org/W3043433718","doi":"https://doi.org/10.1109/isca45697.2020.00084","mag":"3043433718"},"language":"en","primary_location":{"id":"doi:10.1109/isca45697.2020.00084","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isca45697.2020.00084","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 ACM/IEEE 47th Annual International Symposium on Computer Architecture (ISCA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035704218","display_name":"Udit Gupta","orcid":"https://orcid.org/0000-0002-9118-0961"},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Udit Gupta","raw_affiliation_strings":["Harvard University"],"affiliations":[{"raw_affiliation_string":"Harvard University","institution_ids":["https://openalex.org/I2801851002"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047381366","display_name":"Samuel Hsia","orcid":null},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samuel Hsia","raw_affiliation_strings":["Harvard University"],"affiliations":[{"raw_affiliation_string":"Harvard University","institution_ids":["https://openalex.org/I2801851002"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080311953","display_name":"Vikram Saraph","orcid":"https://orcid.org/0000-0002-3453-5797"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Vikram Saraph","raw_affiliation_strings":["Facebook Inc"],"affiliations":[{"raw_affiliation_string":"Facebook Inc","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100382658","display_name":"Xiaodong Wang","orcid":"https://orcid.org/0000-0002-2945-9240"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Xiaodong Wang","raw_affiliation_strings":["Facebook Inc"],"affiliations":[{"raw_affiliation_string":"Facebook Inc","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089173037","display_name":"Brandon Reagen","orcid":"https://orcid.org/0000-0002-1932-2750"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Brandon Reagen","raw_affiliation_strings":["Facebook Inc"],"affiliations":[{"raw_affiliation_string":"Facebook Inc","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043327132","display_name":"Gu-Yeon Wei","orcid":"https://orcid.org/0000-0001-5730-9904"},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gu-Yeon Wei","raw_affiliation_strings":["Harvard University"],"affiliations":[{"raw_affiliation_string":"Harvard University","institution_ids":["https://openalex.org/I2801851002"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072539515","display_name":"Hsien-Hsin S. Lee","orcid":"https://orcid.org/0000-0002-8926-8243"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Hsien-Hsin S. Lee","raw_affiliation_strings":["Facebook Inc"],"affiliations":[{"raw_affiliation_string":"Facebook Inc","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026496503","display_name":"David Brooks","orcid":"https://orcid.org/0000-0002-0662-7889"},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Brooks","raw_affiliation_strings":["Harvard University"],"affiliations":[{"raw_affiliation_string":"Harvard University","institution_ids":["https://openalex.org/I2801851002"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028220093","display_name":"Carole-Jean Wu","orcid":"https://orcid.org/0000-0002-9032-7239"},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Carole-Jean Wu","raw_affiliation_strings":["Facebook Inc"],"affiliations":[{"raw_affiliation_string":"Facebook Inc","institution_ids":["https://openalex.org/I2252078561"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5035704218"],"corresponding_institution_ids":["https://openalex.org/I2801851002"],"apc_list":null,"apc_paid":null,"fwci":13.51107846,"has_fulltext":false,"cited_by_count":140,"citation_normalized_percentile":{"value":0.98900916,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"982","last_page":"995"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8333847522735596},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7439612150192261},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6473174095153809},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6378264427185059},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.5073143839836121},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4881473481655121},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.4510927200317383},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.33722227811813354},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3225141167640686},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3065398335456848},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.28397995233535767},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.11563250422477722}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8333847522735596},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7439612150192261},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6473174095153809},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6378264427185059},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.5073143839836121},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4881473481655121},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4510927200317383},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.33722227811813354},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3225141167640686},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3065398335456848},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28397995233535767},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.11563250422477722},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/isca45697.2020.00084","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isca45697.2020.00084","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 ACM/IEEE 47th Annual International Symposium on Computer Architecture (ISCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.6399999856948853,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":84,"referenced_works":["https://openalex.org/W1979717209","https://openalex.org/W1997025365","https://openalex.org/W2000967104","https://openalex.org/W2042281163","https://openalex.org/W2048266589","https://openalex.org/W2051265981","https://openalex.org/W2053794660","https://openalex.org/W2054141820","https://openalex.org/W2062832101","https://openalex.org/W2063186542","https://openalex.org/W2067523571","https://openalex.org/W2098274770","https://openalex.org/W2098505406","https://openalex.org/W2099517310","https://openalex.org/W2100415730","https://openalex.org/W2100741459","https://openalex.org/W2142801765","https://openalex.org/W2143419558","https://openalex.org/W2152839228","https://openalex.org/W2194775991","https://openalex.org/W2261536765","https://openalex.org/W2261808795","https://openalex.org/W2285660444","https://openalex.org/W2289252105","https://openalex.org/W2409247164","https://openalex.org/W2475334473","https://openalex.org/W2489529491","https://openalex.org/W2508602506","https://openalex.org/W2511743527","https://openalex.org/W2513554817","https://openalex.org/W2515080096","https://openalex.org/W2518281301","https://openalex.org/W2518511512","https://openalex.org/W2525778437","https://openalex.org/W2528784626","https://openalex.org/W2528800812","https://openalex.org/W2541839172","https://openalex.org/W2565851976","https://openalex.org/W2605347906","https://openalex.org/W2605350416","https://openalex.org/W2606722458","https://openalex.org/W2625457103","https://openalex.org/W2723293840","https://openalex.org/W2770159745","https://openalex.org/W2771571505","https://openalex.org/W2794670651","https://openalex.org/W2903754802","https://openalex.org/W2931122162","https://openalex.org/W2947737663","https://openalex.org/W2962745591","https://openalex.org/W2962914733","https://openalex.org/W2963673357","https://openalex.org/W2964174152","https://openalex.org/W2964330541","https://openalex.org/W2972269283","https://openalex.org/W2973172293","https://openalex.org/W2975367729","https://openalex.org/W2979245724","https://openalex.org/W2979310060","https://openalex.org/W2979313476","https://openalex.org/W2979719709","https://openalex.org/W2984140583","https://openalex.org/W3011515423","https://openalex.org/W3016842236","https://openalex.org/W3016939927","https://openalex.org/W3024621361","https://openalex.org/W3043571714","https://openalex.org/W3105753409","https://openalex.org/W3125484574","https://openalex.org/W3141650078","https://openalex.org/W4233798301","https://openalex.org/W4234863022","https://openalex.org/W4239722617","https://openalex.org/W4240168186","https://openalex.org/W4246587277","https://openalex.org/W4247470470","https://openalex.org/W6687483927","https://openalex.org/W6763737044","https://openalex.org/W6766057927","https://openalex.org/W6767602856","https://openalex.org/W6767617057","https://openalex.org/W6767799710","https://openalex.org/W6769062451","https://openalex.org/W6769454693"],"related_works":["https://openalex.org/W4390273403","https://openalex.org/W4386781444","https://openalex.org/W2150182025","https://openalex.org/W3092950680","https://openalex.org/W3197542405","https://openalex.org/W2056712470","https://openalex.org/W3125580266","https://openalex.org/W4288390103","https://openalex.org/W3128807919","https://openalex.org/W3176411177"],"abstract_inverted_index":{"Neural":[0],"personalized":[1],"recommendation":[2,28,42,110,136,148,170,184],"is":[3],"the":[4,24,117,142],"cornerstone":[5],"of":[6,10,19,27,54,109,121,135,146,169],"a":[7,41,106,167,189],"wide":[8,107],"collection":[9],"cloud":[11,20],"services":[12],"and":[13,58,63,91,119,131,156,183,197],"products,":[14],"constituting":[15],"significant":[16],"compute":[17],"demand":[18],"infrastructure.":[21],"Thus,":[22],"improving":[23],"execution":[25],"efficiency":[26,133],"directly":[29],"translates":[30],"into":[31,51,174],"infrastructure":[32],"capacity":[33],"saving.":[34],"In":[35,138],"this":[36,93,152],"paper,":[37,153],"we":[38,89,154],"propose":[39],"DeepRecSched,":[40],"inference":[43,55],"scheduler":[44],"that":[45],"maximizes":[46],"latency-bounded":[47],"throughput":[48,77],"by":[49,82,112],"taking":[50,173],"account":[52,175],"characteristics":[53],"query":[56,180,185],"size":[57],"arrival":[59,181],"patterns,":[60],"model":[61],"architectures,":[62],"underlying":[64],"hardware":[65,124],"systems.":[66],"By":[67],"carefully":[68],"optimizing":[69,126],"task":[70],"versus":[71],"data-level":[72],"parallelism,":[73],"DeepRecSched":[74,115],"improves":[75],"system":[76,127],"on":[78],"server":[79],"class":[80],"CPUs":[81],"2\u00d7":[83],"across":[84,105],"eight":[85],"industry-representative":[86,195],"models.":[87],"Next,":[88],"deploy":[90],"evaluate":[92],"optimization":[94],"in":[95,125,151],"an":[96,158],"at-scale":[97,176],"production":[98,190],"datacenter":[99],"which":[100],"reduces":[101],"end-to-end":[102,159],"tail":[103,198],"latency":[104,199],"variety":[108,168],"models":[111,196],"30%.":[113],"Finally,":[114],"demonstrates":[116],"role":[118],"impact":[120],"specialized":[122],"AI":[123],"level":[128],"performance":[129],"(QPS)":[130],"power":[132],"(QPS/watt)":[134],"inference.":[137],"order":[139],"to":[140],"enable":[141],"design":[143,155],"space":[144],"exploration":[145],"customized":[147],"systems":[149],"shown":[150],"validate":[157],"modeling":[160],"infrastructure,":[161],"DeepRecInfra.":[162],"DeepRecInfra":[163],"enables":[164],"studies":[165],"over":[166],"use":[171],"cases,":[172],"effects,":[177],"such":[178],"as":[179,192,194],"patterns":[182],"sizes,":[186],"observed":[187],"from":[188],"datacenter,":[191],"well":[193],"targets.":[200]},"counts_by_year":[{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":30},{"year":2023,"cited_by_count":33},{"year":2022,"cited_by_count":25},{"year":2021,"cited_by_count":29},{"year":2020,"cited_by_count":5}],"updated_date":"2026-02-20T08:17:22.645390","created_date":"2025-10-10T00:00:00"}
