{"id":"https://openalex.org/W4412610616","doi":"https://doi.org/10.1109/ipdps64566.2025.00070","title":"Reducing the End-to-End Latency of DNN-Based Recommendation Systems in GPU Pools","display_name":"Reducing the End-to-End Latency of DNN-Based Recommendation Systems in GPU Pools","publication_year":2025,"publication_date":"2025-06-03","ids":{"openalex":"https://openalex.org/W4412610616","doi":"https://doi.org/10.1109/ipdps64566.2025.00070"},"language":"en","primary_location":{"id":"doi:10.1109/ipdps64566.2025.00070","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps64566.2025.00070","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Parallel and Distributed Processing Symposium (IPDPS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002587824","display_name":"Guangqiang Luan","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guangqiang Luan","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083883541","display_name":"Pu Pang","orcid":"https://orcid.org/0009-0004-3685-0901"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pu Pang","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377840","display_name":"Quan Chen","orcid":"https://orcid.org/0000-0001-5832-0347"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Chen","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418509","display_name":"Chen Chen","orcid":"https://orcid.org/0000-0002-9212-9308"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chen Chen","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101732756","display_name":"Guoyao Xu","orcid":"https://orcid.org/0000-0002-1136-2678"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guoyao Xu","raw_affiliation_strings":["Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100458200","display_name":"Chi Zhang","orcid":"https://orcid.org/0000-0002-7511-805X"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chi Zhang","raw_affiliation_strings":["Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119057121","display_name":"Yanyi Zi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yanyi Zi","raw_affiliation_strings":["Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023686830","display_name":"Yinghao Yu","orcid":"https://orcid.org/0000-0002-2744-845X"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yinghao Yu","raw_affiliation_strings":["Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073989304","display_name":"Guodong Yang","orcid":"https://orcid.org/0000-0003-1908-071X"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guodong Yang","raw_affiliation_strings":["Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100426755","display_name":"Liping Zhang","orcid":"https://orcid.org/0000-0002-6884-3229"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liping Zhang","raw_affiliation_strings":["Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039318240","display_name":"Minyi Guo","orcid":"https://orcid.org/0000-0003-0034-2302"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minyi Guo","raw_affiliation_strings":["Shanghai Jiao Tong University,Department of Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,Department of Computer Science and Engineering","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5002587824"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30077592,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"725","last_page":"736"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9767000079154968,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9767000079154968,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9314000010490417,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9182000160217285,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7799472212791443},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.6235915422439575},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5938096642494202},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4604312777519226},{"id":"https://openalex.org/keywords/low-latency","display_name":"Low latency (capital markets)","score":0.42023056745529175},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3687392771244049},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.32714641094207764},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19064953923225403},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.18895134329795837},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.10425469279289246}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7799472212791443},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.6235915422439575},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5938096642494202},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4604312777519226},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.42023056745529175},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3687392771244049},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.32714641094207764},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19064953923225403},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.18895134329795837},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10425469279289246}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ipdps64566.2025.00070","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdps64566.2025.00070","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Parallel and Distributed Processing Symposium (IPDPS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1973730821","https://openalex.org/W1997049009","https://openalex.org/W2064823719","https://openalex.org/W2136189984","https://openalex.org/W2162884889","https://openalex.org/W2517617279","https://openalex.org/W2606722458","https://openalex.org/W2920031528","https://openalex.org/W2926543120","https://openalex.org/W2962745591","https://openalex.org/W2962989965","https://openalex.org/W2983438380","https://openalex.org/W2984200518","https://openalex.org/W2994850640","https://openalex.org/W3016842236","https://openalex.org/W3043433718","https://openalex.org/W3158146252","https://openalex.org/W3176770761","https://openalex.org/W3202572187","https://openalex.org/W3205539956","https://openalex.org/W3210776666","https://openalex.org/W4214658871","https://openalex.org/W4220884018","https://openalex.org/W4220924023","https://openalex.org/W4242102164","https://openalex.org/W4280642915","https://openalex.org/W4285504009","https://openalex.org/W4308426211","https://openalex.org/W4380881110","https://openalex.org/W4387302766","https://openalex.org/W6716975455","https://openalex.org/W6751970145","https://openalex.org/W6764873897","https://openalex.org/W6775327868","https://openalex.org/W6781287053","https://openalex.org/W6781728138","https://openalex.org/W6784871562","https://openalex.org/W6784879251","https://openalex.org/W6797162769","https://openalex.org/W6847189266"],"related_works":["https://openalex.org/W2005148983","https://openalex.org/W2012954338","https://openalex.org/W2096672917","https://openalex.org/W2392023973","https://openalex.org/W3205411230","https://openalex.org/W4286899009","https://openalex.org/W9168048","https://openalex.org/W4300849822","https://openalex.org/W4376480820","https://openalex.org/W3155891479"],"abstract_inverted_index":{"While":[0],"intelligent":[1],"applications":[2],"(e.g.,":[3],"recommendation":[4,31,187],"systems)":[5],"prefer":[6],"different":[7],"CPU-GPU":[8],"ratios,":[9],"GPU":[10,16,43,78,90],"pooling":[11,79],"technique":[12],"that":[13,111],"decouples":[14],"the":[15,35,41,55,64,73,85,89,133,137,153,183],"and":[17,136],"CPU":[18],"resources":[19],"yields":[20],"substantial":[21],"flexibility":[22],"when":[23],"serving":[24],"diverse":[25],"applications.":[26],"With":[27],"such":[28,50],"architecture,":[29],"DNN-based":[30],"services":[32],"often":[33],"offload":[34],"compute-intensive":[36],"neural":[37],"network":[38],"layers":[39],"to":[40,60,88,146,167,191],"remote":[42],"pool":[44,91],"for":[45,69],"high":[46],"resource":[47],"utilization.":[48],"However,":[49],"a":[51,108,113,120],"paradigm":[52],"results":[53,174],"in":[54,76],"long":[56],"end-to-end":[57],"latency":[58,184],"due":[59],"two":[61],"causes:":[62],"1)":[63],"intermediate":[65],"data":[66,150,163],"is":[67],"copied":[68],"multiple":[70,93],"times":[71],"during":[72],"entire":[74],"process":[75],"current":[77],"practices,":[80],"incurring":[81],"heavy":[82],"overheads;":[83],"2)":[84],"content":[86],"transferred":[87],"involves":[92],"small":[94,159],"tensors,":[95],"suffering":[96],"from":[97],"poor":[98],"bandwidth":[99],"efficiency.":[100,172],"To":[101],"solve":[102],"these":[103],"problems,":[104],"we":[105],"design":[106],"Zero,":[107],"runtime":[109],"system":[110],"incorporates":[112],"zero-copy":[114,126],"transmission":[115,127],"mechanism":[116,128],"as":[117,119,166],"well":[118],"dynamic":[121],"tensor":[122,154],"merging":[123,155],"policy.":[124],"The":[125],"unifies":[129],"memory":[130],"management":[131],"across":[132],"inference":[134],"framework":[135],"RPC":[138],"framework,":[139],"accompanied":[140],"by":[141,189],"an":[142],"elaborated":[143],"serialization":[144],"protocol":[145],"fully":[147],"eliminate":[148],"redundant":[149],"copying.":[151],"Meanwhile,":[152],"policy":[156],"deliberately":[157],"organizes":[158],"tensors":[160],"into":[161],"larger":[162],"blocks,":[164],"so":[165],"transfer":[168],"them":[169],"with":[170,178],"higher":[171],"Experimental":[173],"show":[175],"that,":[176],"compared":[177],"prior":[179],"work,":[180],"Zero":[181],"reduces":[182],"of":[185],"typical":[186],"models":[188],"up":[190],"15.1%":[192],"(10.1%":[193],"on":[194],"average).":[195]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
