{"id":"https://openalex.org/W4409248575","doi":"https://doi.org/10.1109/hpca61900.2025.00121","title":"Machine Learning-Guided Memory Optimization for DLRM Inference on Tiered Memory","display_name":"Machine Learning-Guided Memory Optimization for DLRM Inference on Tiered Memory","publication_year":2025,"publication_date":"2025-03-01","ids":{"openalex":"https://openalex.org/W4409248575","doi":"https://doi.org/10.1109/hpca61900.2025.00121"},"language":"en","primary_location":{"id":"doi:10.1109/hpca61900.2025.00121","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00121","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056482728","display_name":"Jie Ren","orcid":"https://orcid.org/0000-0001-5541-433X"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jie Ren","raw_affiliation_strings":["William &#x0026; Mary"],"affiliations":[{"raw_affiliation_string":"William &#x0026; Mary","institution_ids":["https://openalex.org/I16285277"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100595427","display_name":"Bin Ma","orcid":"https://orcid.org/0000-0002-3724-4581"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bin Ma","raw_affiliation_strings":["William &#x0026; Mary"],"affiliations":[{"raw_affiliation_string":"William &#x0026; Mary","institution_ids":["https://openalex.org/I16285277"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083107581","display_name":"Shuangyan Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I156087764","display_name":"University of California, Merced","ror":"https://ror.org/00d9ah105","country_code":"US","type":"education","lineage":["https://openalex.org/I156087764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shuangyan Yang","raw_affiliation_strings":["University of California,Merced"],"affiliations":[{"raw_affiliation_string":"University of California,Merced","institution_ids":["https://openalex.org/I156087764"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048076765","display_name":"Benjamin Francis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benjamin Francis","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112622151","display_name":"Ehsan K. Ardestani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ehsan K. Ardestani","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109535394","display_name":"Min Si","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Min Si","raw_affiliation_strings":["Meta"],"affiliations":[{"raw_affiliation_string":"Meta","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101751775","display_name":"Li Dong","orcid":"https://orcid.org/0000-0003-3083-7170"},"institutions":[{"id":"https://openalex.org/I16285277","display_name":"William & Mary","ror":"https://ror.org/03hsf0573","country_code":"US","type":"education","lineage":["https://openalex.org/I16285277"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dong Li","raw_affiliation_strings":["William &#x0026; Mary"],"affiliations":[{"raw_affiliation_string":"William &#x0026; Mary","institution_ids":["https://openalex.org/I16285277"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5056482728"],"corresponding_institution_ids":["https://openalex.org/I16285277"],"apc_list":null,"apc_paid":null,"fwci":12.0603,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.98129817,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1631","last_page":"1647"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.2669999897480011,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.2669999897480011,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7691915035247803},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6356633901596069},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5238856673240662},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4835941195487976},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.43881314992904663},{"id":"https://openalex.org/keywords/semiconductor-memory","display_name":"Semiconductor memory","score":0.2516951262950897},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.07208263874053955}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7691915035247803},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6356633901596069},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5238856673240662},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4835941195487976},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.43881314992904663},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.2516951262950897},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.07208263874053955}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca61900.2025.00121","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00121","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":83,"referenced_works":["https://openalex.org/W1987225815","https://openalex.org/W2051407019","https://openalex.org/W2125977901","https://openalex.org/W2168923722","https://openalex.org/W2329976284","https://openalex.org/W2512971201","https://openalex.org/W2611027062","https://openalex.org/W2613977835","https://openalex.org/W2615718892","https://openalex.org/W2632775315","https://openalex.org/W2756531049","https://openalex.org/W2795232275","https://openalex.org/W2903504144","https://openalex.org/W2905519370","https://openalex.org/W2930718998","https://openalex.org/W2947737663","https://openalex.org/W2949383392","https://openalex.org/W2949727479","https://openalex.org/W2979729542","https://openalex.org/W2988190042","https://openalex.org/W3012303953","https://openalex.org/W3043023836","https://openalex.org/W3111158081","https://openalex.org/W3131379896","https://openalex.org/W3152508978","https://openalex.org/W3153963463","https://openalex.org/W3155243801","https://openalex.org/W3157864729","https://openalex.org/W3169244019","https://openalex.org/W3169936356","https://openalex.org/W3170642333","https://openalex.org/W3170905359","https://openalex.org/W3177828909","https://openalex.org/W3201621211","https://openalex.org/W4214658871","https://openalex.org/W4229042463","https://openalex.org/W4239937081","https://openalex.org/W4241057782","https://openalex.org/W4253438286","https://openalex.org/W4280630845","https://openalex.org/W4285815122","https://openalex.org/W4308083579","https://openalex.org/W4312996980","https://openalex.org/W4318541545","https://openalex.org/W4321446282","https://openalex.org/W4321636683","https://openalex.org/W4380881110","https://openalex.org/W4388109512","https://openalex.org/W4389476179","https://openalex.org/W4391623937","https://openalex.org/W4393407138","https://openalex.org/W4394923336","https://openalex.org/W4394998875","https://openalex.org/W4399830188","https://openalex.org/W4401212102","https://openalex.org/W4405755164","https://openalex.org/W4409248382","https://openalex.org/W6635797942","https://openalex.org/W6641617223","https://openalex.org/W6736893582","https://openalex.org/W6739901393","https://openalex.org/W6749259350","https://openalex.org/W6749858812","https://openalex.org/W6755207826","https://openalex.org/W6756363772","https://openalex.org/W6760999108","https://openalex.org/W6763737044","https://openalex.org/W6768648331","https://openalex.org/W6774028308","https://openalex.org/W6774806506","https://openalex.org/W6778883912","https://openalex.org/W6779109089","https://openalex.org/W6784249077","https://openalex.org/W6786109342","https://openalex.org/W6787953186","https://openalex.org/W6789141709","https://openalex.org/W6791144649","https://openalex.org/W6794474486","https://openalex.org/W6841945416","https://openalex.org/W6846450229","https://openalex.org/W6849286969","https://openalex.org/W6882762124","https://openalex.org/W7019128387"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Deep":[0],"learning":[1,42],"recommendation":[2],"models":[3,101],"(DLRMs)":[4],"are":[5],"widely":[6],"used":[7],"in":[8,29,76,86],"industry,":[9],"and":[10,48,89,104,118,127,139],"their":[11],"memory":[12,20],"capacity":[13],"requirements":[14],"reach":[15],"the":[16,77,91,114],"terabyte":[17],"scale.":[18],"Tiered":[19],"architectures":[21],"provide":[22],"a":[23,40,107],"cost-effective":[24],"solution":[25],"but":[26],"introduce":[27],"challenges":[28,85],"embedding-vector":[30,95],"placement":[31],"due":[32],"to":[33,57,123,157],"complex":[34],"embedding-access":[35],"patterns.":[36],"We":[37],"propose":[38],"RecMG,":[39],"machine":[41],"(ML)-guided":[43],"system":[44],"for":[45,94,102],"vector":[46],"caching":[47,103],"prefetching":[49,115],"on":[50,72],"tiered":[51],"memory.":[52],"RecMG":[53,70,112,130,148],"accurately":[54],"predicts":[55],"accesses":[56],"embedding":[58],"vectors":[59],"with":[60],"long":[61],"reuse":[62],"distances":[63],"or":[64],"few":[65],"reuses.":[66],"The":[67],"design":[68],"of":[69,79],"focuses":[71],"making":[73],"ML":[74,100],"feasible":[75],"context":[78],"DLRM":[80,145,152],"inference":[81,146,153],"by":[82,134,155],"addressing":[83],"unique":[84],"data":[87],"labeling":[88],"navigating":[90],"search":[92,116],"space":[93,117],"placement.":[96],"By":[97],"employing":[98],"separate":[99],"prefetching,":[105],"plus":[106],"novel":[108],"differentiable":[109],"loss":[110],"function,":[111],"narrows":[113],"minimizes":[119],"on-demand":[120,132],"fetches.":[121],"Compared":[122],"state-of-the-art":[124],"temporal,":[125],"spatial,":[126],"ML-based":[128],"prefetchers,":[129],"reduces":[131,150],"fetches":[133],"$2.2":[135],"\\times,":[136],"2.8":[137],"\\times$,":[138,141],"$1.5":[140],"respectively.":[142],"In":[143],"industrial-scale":[144],"scenarios,":[147],"effectively":[149],"end-to-end":[151],"time":[154],"up":[156],"43%.":[158]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
