{"id":"https://openalex.org/W4281634017","doi":"https://doi.org/10.1145/3470496.3527386","title":"Training personalized recommendation systems from (GPU) scratch","display_name":"Training personalized recommendation systems from (GPU) scratch","publication_year":2022,"publication_date":"2022-05-31","ids":{"openalex":"https://openalex.org/W4281634017","doi":"https://doi.org/10.1145/3470496.3527386"},"language":"en","primary_location":{"id":"doi:10.1145/3470496.3527386","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3470496.3527386","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 49th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037670571","display_name":"Youngeun Kwon","orcid":"https://orcid.org/0000-0002-4020-8995"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]},{"id":"https://openalex.org/I4210099236","display_name":"Kootenay Association for Science & Technology","ror":"https://ror.org/011pv9p44","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210099236"]}],"countries":["CA","KR"],"is_corresponding":true,"raw_author_name":"Youngeun Kwon","raw_affiliation_strings":["KAIST"],"affiliations":[{"raw_affiliation_string":"KAIST","institution_ids":["https://openalex.org/I4210099236","https://openalex.org/I157485424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091648103","display_name":"Minsoo Rhu","orcid":"https://orcid.org/0000-0003-3303-8681"},"institutions":[{"id":"https://openalex.org/I157485424","display_name":"Korea Advanced Institute of Science and Technology","ror":"https://ror.org/05apxxy63","country_code":"KR","type":"education","lineage":["https://openalex.org/I157485424"]},{"id":"https://openalex.org/I4210099236","display_name":"Kootenay Association for Science & Technology","ror":"https://ror.org/011pv9p44","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210099236"]}],"countries":["CA","KR"],"is_corresponding":false,"raw_author_name":"Minsoo Rhu","raw_affiliation_strings":["KAIST"],"affiliations":[{"raw_affiliation_string":"KAIST","institution_ids":["https://openalex.org/I4210099236","https://openalex.org/I157485424"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5037670571"],"corresponding_institution_ids":["https://openalex.org/I157485424","https://openalex.org/I4210099236"],"apc_list":null,"apc_paid":null,"fwci":6.6876,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.96815708,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"860","last_page":"873"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8350151777267456},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.7574914693832397},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7059342861175537},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6099516749382019},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.46583786606788635},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.4259337782859802},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.42478060722351074},{"id":"https://openalex.org/keywords/central-processing-unit","display_name":"Central processing unit","score":0.412203848361969},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4109938144683838},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.34057706594467163},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2644462585449219},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.24282512068748474}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8350151777267456},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.7574914693832397},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7059342861175537},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6099516749382019},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.46583786606788635},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.4259337782859802},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.42478060722351074},{"id":"https://openalex.org/C49154492","wikidata":"https://www.wikidata.org/wiki/Q5300","display_name":"Central processing unit","level":2,"score":0.412203848361969},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4109938144683838},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.34057706594467163},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2644462585449219},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.24282512068748474},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3470496.3527386","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3470496.3527386","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 49th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W2048266589","https://openalex.org/W2152839228","https://openalex.org/W2442974303","https://openalex.org/W2489529491","https://openalex.org/W2512971201","https://openalex.org/W2515287984","https://openalex.org/W2516141709","https://openalex.org/W2518511512","https://openalex.org/W2529865518","https://openalex.org/W2606722458","https://openalex.org/W2790925711","https://openalex.org/W2796360442","https://openalex.org/W2903754802","https://openalex.org/W2962958870","https://openalex.org/W2969388332","https://openalex.org/W2979719709","https://openalex.org/W2984020950","https://openalex.org/W3012479151","https://openalex.org/W3012514909","https://openalex.org/W3016842236","https://openalex.org/W3042495273","https://openalex.org/W3043023836","https://openalex.org/W3043433718","https://openalex.org/W3113181213","https://openalex.org/W3152501898","https://openalex.org/W3153887672","https://openalex.org/W3155243801","https://openalex.org/W3157531038","https://openalex.org/W3157864729","https://openalex.org/W3158702178","https://openalex.org/W3158783012","https://openalex.org/W3187188899","https://openalex.org/W3197720002","https://openalex.org/W3205803342","https://openalex.org/W3207399097","https://openalex.org/W4247198796","https://openalex.org/W4301361180","https://openalex.org/W6600336938","https://openalex.org/W6811370846"],"related_works":["https://openalex.org/W3000570965","https://openalex.org/W2133682266","https://openalex.org/W2497617944","https://openalex.org/W2167303720","https://openalex.org/W1563139915","https://openalex.org/W2109715593","https://openalex.org/W2061075966","https://openalex.org/W3147501184","https://openalex.org/W2268996566","https://openalex.org/W4256652509"],"abstract_inverted_index":{"Personalized":[0],"recommendation":[1],"models":[2],"(RecSys)":[3],"are":[4],"one":[5],"of":[6,19,30,34,47,146,177],"the":[7,39,45,60,65,84,107,158,162,173],"most":[8],"popular":[9],"machine":[10],"learning":[11],"workload":[12],"serviced":[13],"by":[14],"hyperscalers.":[15],"A":[16],"critical":[17],"challenge":[18],"training":[20,71,148,192],"RecSys":[21,147],"is":[22,80],"its":[23],"high":[24],"memory":[25,48,63,66,75,101,198],"capacity":[26],"requirements,":[27],"reaching":[28],"hundreds":[29],"GBs":[31],"to":[32,58,94,104,111,149,170,193],"TBs":[33],"model":[35],"size.":[36],"In":[37,124],"RecSys,":[38],"so-called":[40],"embedding":[41,68,108,135,152,178,190],"layers":[42,179],"account":[43],"for":[44,137],"majority":[46],"usage":[49],"so":[50],"current":[51],"systems":[52],"employ":[53],"a":[54,129],"hybrid":[55],"CPU-GPU":[56],"design":[57],"have":[59],"large":[61],"CPU":[62,86,112],"store":[64],"hungry":[67],"layers.":[69],"Unfortunately,":[70],"embeddings":[72,98],"involve":[73],"several":[74,118],"bandwidth":[76],"intensive":[77],"operations":[78],"which":[79],"at":[81,196],"odds":[82],"with":[83,120],"slow":[85],"memory,":[87,113],"causing":[88],"performance":[89],"overheads.":[90],"Prior":[91],"work":[92],"proposed":[93,140,186],"cache":[95,122,153,164,187],"frequently":[96],"accessed":[97],"inside":[99,184],"GPU":[100,197],"as":[102],"means":[103],"filter":[105],"down":[106],"layer":[109,191],"traffic":[110],"but":[114,160],"this":[115,125],"paper":[116],"observes":[117],"limitations":[119],"such":[121,168],"design.":[123],"work,":[126],"we":[127],"present":[128],"fundamentally":[130],"different":[131],"approach":[132],"in":[133],"designing":[134],"caches":[136],"RecSys.":[138],"Our":[139],"ScratchPipe":[141,166],"architecture":[142],"utilizes":[143],"unique":[144],"properties":[145],"develop":[150],"an":[151],"that":[154,172],"not":[155],"only":[156],"sees":[157],"past":[159],"also":[161],"\"future\"":[163],"accesses.":[165],"exploits":[167],"property":[169],"guarantee":[171],"active":[174],"working":[175],"set":[176],"can":[180],"\"always\"":[181],"be":[182,194],"captured":[183],"our":[185],"design,":[188],"enabling":[189],"conducted":[195],"speed.":[199]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
