{"id":"https://openalex.org/W4386568791","doi":"https://doi.org/10.1145/3609384","title":"<scp>EMS-i</scp> : An Efficient Memory System Design with Specialized Caching Mechanism for Recommendation Inference","display_name":"<scp>EMS-i</scp> : An Efficient Memory System Design with Specialized Caching Mechanism for Recommendation Inference","publication_year":2023,"publication_date":"2023-09-09","ids":{"openalex":"https://openalex.org/W4386568791","doi":"https://doi.org/10.1145/3609384"},"language":"en","primary_location":{"id":"doi:10.1145/3609384","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3609384","pdf_url":null,"source":{"id":"https://openalex.org/S136160450","display_name":"ACM Transactions on Embedded Computing Systems","issn_l":"1539-9087","issn":["1539-9087","1558-3465"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Embedded Computing Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053039089","display_name":"Yitu Wang","orcid":"https://orcid.org/0000-0002-0129-7294"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yitu Wang","raw_affiliation_strings":["Duke University, USA"],"affiliations":[{"raw_affiliation_string":"Duke University, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100413190","display_name":"Shiyu Li","orcid":"https://orcid.org/0000-0002-1990-7150"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shiyu Li","raw_affiliation_strings":["Duke University, USA"],"affiliations":[{"raw_affiliation_string":"Duke University, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032094524","display_name":"Qilin Zheng","orcid":"https://orcid.org/0000-0002-5593-1369"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qilin Zheng","raw_affiliation_strings":["Duke University, USA"],"affiliations":[{"raw_affiliation_string":"Duke University, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082100677","display_name":"Andrew Chang","orcid":"https://orcid.org/0009-0006-1573-1377"},"institutions":[{"id":"https://openalex.org/I4210101778","display_name":"Samsung (United States)","ror":"https://ror.org/01bfbvm65","country_code":"US","type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4210101778"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew Chang","raw_affiliation_strings":["Samsung Semiconductor, Inc., USA"],"affiliations":[{"raw_affiliation_string":"Samsung Semiconductor, Inc., USA","institution_ids":["https://openalex.org/I4210101778"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100429403","display_name":"Hai Li","orcid":"https://orcid.org/0000-0003-3228-6544"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hai Li","raw_affiliation_strings":["Duke University, USA"],"affiliations":[{"raw_affiliation_string":"Duke University, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058073627","display_name":"Yiran Chen","orcid":"https://orcid.org/0000-0002-1486-8412"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yiran Chen","raw_affiliation_strings":["Duke University, USA"],"affiliations":[{"raw_affiliation_string":"Duke University, USA","institution_ids":["https://openalex.org/I170897317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5053039089"],"corresponding_institution_ids":["https://openalex.org/I170897317"],"apc_list":null,"apc_paid":null,"fwci":1.4064,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.82562885,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"22","issue":"5s","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9125887155532837},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.656983494758606},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6564539670944214},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5524258017539978},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5329680442810059},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.4797593057155609},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.462594211101532},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.43725061416625977},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.4252403974533081},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.41737794876098633},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3554800748825073},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.35494863986968994},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.09042713046073914}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9125887155532837},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.656983494758606},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6564539670944214},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5524258017539978},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5329680442810059},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.4797593057155609},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.462594211101532},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.43725061416625977},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4252403974533081},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.41737794876098633},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3554800748825073},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.35494863986968994},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.09042713046073914},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3609384","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3609384","pdf_url":null,"source":{"id":"https://openalex.org/S136160450","display_name":"ACM Transactions on Embedded Computing Systems","issn_l":"1539-9087","issn":["1539-9087","1558-3465"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Embedded Computing Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8899999856948853,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2034861439","https://openalex.org/W2167032986","https://openalex.org/W2250539671","https://openalex.org/W2427881153","https://openalex.org/W2614421408","https://openalex.org/W2750384547","https://openalex.org/W2947737663","https://openalex.org/W2963469388","https://openalex.org/W2979719709","https://openalex.org/W3156636320","https://openalex.org/W3158702178","https://openalex.org/W3191222816","https://openalex.org/W3201621211","https://openalex.org/W4200404159","https://openalex.org/W4214821564","https://openalex.org/W4221149098","https://openalex.org/W4233429846","https://openalex.org/W4234552385","https://openalex.org/W4245659846","https://openalex.org/W6659004162","https://openalex.org/W6779824479"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2013643406","https://openalex.org/W2027972911","https://openalex.org/W2157978810","https://openalex.org/W2078036665","https://openalex.org/W778337757"],"abstract_inverted_index":{"Recommendation":[0],"systems":[1],"have":[2],"been":[3],"widely":[4],"embedded":[5],"into":[6,147],"many":[7],"Internet":[8],"services.":[9],"For":[10],"example,":[11],"Meta\u2019s":[12],"deep":[13],"learning":[14],"recommendation":[15,157],"model":[16],"(DLRM)":[17],"shows":[18],"high":[19,111],"prefictive":[20],"accuracy":[21],"of":[22,34,41,113,169,214],"click-through":[23],"rate":[24],"in":[25,205],"processing":[26,61],"large-scale":[27],"embedding":[28,52],"tables.":[29],"The":[30],"SparseLengthSum":[31],"(SLS)":[32],"kernel":[33,191],"the":[35,38,42,51,86,98,102,110,115,119,122,125,129,148,162,167,182,189,202,208,218,231],"DLRM":[36,43,171],"dominates":[37],"inference":[39,190],"time":[40],"due":[44],"to":[45,50,64,69,83,96,166,179,217,225,234,244],"intensive":[46],"irregular":[47],"memory":[48,67,75,138,149,249],"accesses":[49],"vectors.":[53],"Some":[54],"prior":[55],"works":[56],"directly":[57],"adopt":[58],"near":[59],"data":[60,87,209],"(NDP)":[62],"solutions":[63],"obtain":[65],"higher":[66],"bandwidth":[68],"accelerate":[70],"SLS.":[71],"However,":[72],"their":[73],"inferior":[74],"hierarchy":[76,150],"induces":[77],"low":[78],"performance-cost":[79],"ratio":[80],"and":[81,118,124,173,192,207,230,246,253],"fails":[82],"fully":[84],"exploit":[85],"locality.":[88],"Although":[89],"some":[90],"software-managed":[91],"cache":[92,99,104],"policies":[93],"were":[94],"proposed":[95],"improve":[97,181],"hit":[100],"rate,":[101],"incurred":[103],"miss":[105],"penalty":[106],"is":[107],"unacceptable":[108],"considering":[109,201],"overheads":[112],"executing":[114],"corresponding":[116],"programs":[117],"communication":[120],"between":[121],"host":[123],"accelerator.":[126],"To":[127],"address":[128],"issues":[130],"aforementioned,":[131],"we":[132,186],"propose":[133,174],"EMS-i":[134,222,240],",":[135],"an":[136],"efficient":[137],"system":[139,158],"design":[140,188],"that":[141],"integrates":[142],"Solide":[143],"State":[144],"Drive":[145],"(SSD)":[146],"using":[151],"Compute":[152],"Express":[153],"Link":[154],"(CXL)":[155],"for":[156,198],"inference.":[159],"We":[160],"specialize":[161],"caching":[163],"mechanism":[164,178],"according":[165],"characteristics":[168],"various":[170],"workloads":[172],"a":[175,194,212],"novel":[176],"prefetching":[177],"further":[180],"performance.":[183],"In":[184],"addition,":[185],"delicately":[187],"develop":[193],"customized":[195],"mapping":[196],"scheme":[197],"SLS":[199,206],"operation,":[200],"multi-level":[203],"parallelism":[204],"locality":[210],"within":[211],"batch":[213],"queries.":[215],"Compared":[216],"state-of-the-art":[219],"NDP":[220],"solutions,":[221],"achieves":[223],"up":[224,243],"10.9\u00d7":[226],"speedup":[227],"over":[228],"RecSSD":[229,252],"performance":[232],"comparable":[233],"RecNMP":[235],"with":[236],"72%":[237],"energy":[238],"savings.":[239],"also":[241],"saves":[242],"8.7\u00d7":[245],"6.6":[247],"\u00d7":[248],"cost":[250],"w.r.t.":[251],"RecNMP,":[254],"respectively.":[255]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
