{"id":"https://openalex.org/W4415003672","doi":"https://doi.org/10.1145/3725783.3764389","title":"Towards Fully Disaggregated Recommendation Model Serving","display_name":"Towards Fully Disaggregated Recommendation Model Serving","publication_year":2025,"publication_date":"2025-10-09","ids":{"openalex":"https://openalex.org/W4415003672","doi":"https://doi.org/10.1145/3725783.3764389"},"language":"en","primary_location":{"id":"doi:10.1145/3725783.3764389","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725783.3764389","pdf_url":null,"source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM SIGOPS Asia-Pacific Workshop on Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3725783.3764389","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046351085","display_name":"Yibo Huang","orcid":"https://orcid.org/0000-0002-9215-4298"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yibo Huang","raw_affiliation_strings":["University of Michigan, Ann Arbor, Michigan, USA"],"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, Michigan, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101886757","display_name":"Yiming Qiu","orcid":"https://orcid.org/0009-0003-9328-3205"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yiming Qiu","raw_affiliation_strings":["University of Hong Kong, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100785449","display_name":"Zhenning Yang","orcid":"https://orcid.org/0009-0003-0813-5911"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhenning Yang","raw_affiliation_strings":["University of Michigan, Ann Arbor, Michigan, USA"],"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, Michigan, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102800694","display_name":"Yi Dai","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Dai","raw_affiliation_strings":["Fudan University, Shanghai, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089698613","display_name":"Dingming Wu","orcid":"https://orcid.org/0000-0001-7566-5793"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dingming Wu","raw_affiliation_strings":["Meta Platforms Inc., Menlo Park, California, USA"],"affiliations":[{"raw_affiliation_string":"Meta Platforms Inc., Menlo Park, California, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101622777","display_name":"Fan Lai","orcid":"https://orcid.org/0009-0005-0472-107X"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fan Lai","raw_affiliation_strings":["University of Illinois Urbana-Champaign, Champaign, Illinois, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign, Champaign, Illinois, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053453103","display_name":"Jiarong Xing","orcid":"https://orcid.org/0009-0006-6163-0569"},"institutions":[{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiarong Xing","raw_affiliation_strings":["Rice University, Houston, Texas, USA"],"affiliations":[{"raw_affiliation_string":"Rice University, Houston, Texas, USA","institution_ids":["https://openalex.org/I74775410"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082276670","display_name":"Ang Chen","orcid":"https://orcid.org/0009-0003-8326-8124"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ang Chen","raw_affiliation_strings":["University of Michigan, Ann Arbor, Michigan, USA"],"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, Michigan, USA","institution_ids":["https://openalex.org/I27837315"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5046351085"],"corresponding_institution_ids":["https://openalex.org/I27837315"],"apc_list":null,"apc_paid":null,"fwci":3.2508,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.9399295,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"38","last_page":"45"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.97079998254776,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.6395000219345093},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.6326000094413757},{"id":"https://openalex.org/keywords/remote-direct-memory-access","display_name":"Remote direct memory access","score":0.5587000250816345},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.531499981880188},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4580000042915344},{"id":"https://openalex.org/keywords/shared-resource","display_name":"Shared resource","score":0.450300008058548},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.43220001459121704},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.390500009059906},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.3402000069618225}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8209999799728394},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.6395000219345093},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.6326000094413757},{"id":"https://openalex.org/C130795937","wikidata":"https://www.wikidata.org/wiki/Q2561570","display_name":"Remote direct memory access","level":2,"score":0.5587000250816345},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.531499981880188},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5202999711036682},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.4763000011444092},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4580000042915344},{"id":"https://openalex.org/C51332947","wikidata":"https://www.wikidata.org/wiki/Q1172305","display_name":"Shared resource","level":2,"score":0.450300008058548},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.43220001459121704},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.390500009059906},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3402000069618225},{"id":"https://openalex.org/C2777958785","wikidata":"https://www.wikidata.org/wiki/Q17120940","display_name":"Resource efficiency","level":2,"score":0.32850000262260437},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.32589998841285706},{"id":"https://openalex.org/C98025372","wikidata":"https://www.wikidata.org/wiki/Q477538","display_name":"Systems architecture","level":3,"score":0.3208000063896179},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.31040000915527344},{"id":"https://openalex.org/C65813073","wikidata":"https://www.wikidata.org/wiki/Q1622420","display_name":"High availability","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.3025999963283539},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.30239999294281006},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.29679998755455017},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C72108876","wikidata":"https://www.wikidata.org/wiki/Q844565","display_name":"Transaction processing","level":3,"score":0.2759999930858612},{"id":"https://openalex.org/C31352089","wikidata":"https://www.wikidata.org/wiki/Q3750474","display_name":"Systems design","level":2,"score":0.27219998836517334},{"id":"https://openalex.org/C193415008","wikidata":"https://www.wikidata.org/wiki/Q639681","display_name":"Network architecture","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.2676999866962433}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3725783.3764389","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725783.3764389","pdf_url":null,"source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM SIGOPS Asia-Pacific Workshop on Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3725783.3764389","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725783.3764389","pdf_url":null,"source":null,"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM SIGOPS Asia-Pacific Workshop on Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2926543120","https://openalex.org/W2949498362","https://openalex.org/W2979719709","https://openalex.org/W2981579759","https://openalex.org/W3016842236","https://openalex.org/W3043433718","https://openalex.org/W3153531173","https://openalex.org/W3166863226","https://openalex.org/W3197720002","https://openalex.org/W3197870239","https://openalex.org/W4214658871","https://openalex.org/W4282006522","https://openalex.org/W4285361428","https://openalex.org/W4311118888","https://openalex.org/W4327930478","https://openalex.org/W4380881110"],"related_works":[],"abstract_inverted_index":{"Serving":[0],"embedding-based":[1],"recommendation":[2],"(EMR)":[3],"models":[4],"requires":[5],"a":[6,23,45,111],"mix":[7],"of":[8,66,86,129],"GPUs,":[9],"CPUs,":[10],"and":[11,34,53,75,105,121],"DRAM.":[12],"Current":[13],"systems":[14],"typically":[15],"provision":[16],"these":[17,51],"resources":[18],"on":[19],"monolithic":[20],"servers":[21],"with":[22],"fixed":[24],"ratio":[25],"across":[26],"resource":[27,32,73],"types,":[28],"leading":[29],"to":[30,88],"inefficient":[31],"utilization":[33],"inflated":[35],"operational":[36,77],"costs.":[37,78],"To":[38],"solve":[39],"this":[40,81],"problem,":[41],"we":[42],"propose":[43],"FlexEMR,":[44],"system":[46],"architecture":[47],"that":[48],"fully":[49,133],"disaggregates":[50],"resources,":[52,67],"interconnects":[54],"them":[55],"via":[56],"an":[57],"optimized":[58],"RDMA":[59,114],"network.":[60],"This":[61],"design":[62,119],"enables":[63],"independent":[64],"scaling":[65],"enhances":[68],"failure":[69],"isolation,":[70],"improves":[71],"overall":[72],"efficiency,":[74],"reduces":[76],"We":[79,116],"achieve":[80],"by":[82,94,101],"introducing":[83],"two":[84],"classes":[85],"techniques":[87],"address":[89],"the":[90,127],"networking":[91],"challenges":[92],"introduced":[93],"disaggregation:":[95],"(1)":[96],"optimizing":[97],"embedding":[98],"lookup":[99],"communication":[100],"leveraging":[102],"workload":[103],"locality,":[104],"(2)":[106],"improving":[107],"network":[108],"transport":[109],"through":[110],"high-performance,":[112],"multithreaded":[113],"engine.":[115],"detail":[117],"our":[118],"considerations":[120],"share":[122],"early":[123],"performance":[124],"insights,":[125],"highlighting":[126],"potential":[128],"FlexEMR":[130],"for":[131],"enabling":[132],"disaggregated":[134],"EMR":[135],"model":[136],"serving.":[137]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
