{"id":"https://openalex.org/W4385312506","doi":"https://doi.org/10.1109/iwqos57198.2023.10188785","title":"Understanding the Benefits of Hardware-Accelerated Communication in Model-Serving Applications","display_name":"Understanding the Benefits of Hardware-Accelerated Communication in Model-Serving Applications","publication_year":2023,"publication_date":"2023-06-19","ids":{"openalex":"https://openalex.org/W4385312506","doi":"https://doi.org/10.1109/iwqos57198.2023.10188785"},"language":"en","primary_location":{"id":"doi:10.1109/iwqos57198.2023.10188785","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwqos57198.2023.10188785","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/ACM 31st International Symposium on Quality of Service (IWQoS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090531845","display_name":"Walid A. Hanafy","orcid":"https://orcid.org/0000-0001-5765-8194"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Walid A. Hanafy","raw_affiliation_strings":["University of Massachusetts Amherst"],"affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100436496","display_name":"Limin Wang","orcid":"https://orcid.org/0000-0002-0576-3950"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Limin Wang","raw_affiliation_strings":["Nokia Bell Labs"],"affiliations":[{"raw_affiliation_string":"Nokia Bell Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025679986","display_name":"Hyunseok Chang","orcid":"https://orcid.org/0000-0002-2840-1143"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hyunseok Chang","raw_affiliation_strings":["Nokia Bell Labs"],"affiliations":[{"raw_affiliation_string":"Nokia Bell Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008448513","display_name":"Sarit Mukherjee","orcid":"https://orcid.org/0000-0002-5861-940X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sarit Mukherjee","raw_affiliation_strings":["Nokia Bell Labs"],"affiliations":[{"raw_affiliation_string":"Nokia Bell Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079816803","display_name":"T. V. Lakshman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"T. V. Lakshman","raw_affiliation_strings":["Nokia Bell Labs"],"affiliations":[{"raw_affiliation_string":"Nokia Bell Labs","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032939724","display_name":"Prashant Shenoy","orcid":"https://orcid.org/0000-0002-5435-1901"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Prashant Shenoy","raw_affiliation_strings":["University of Massachusetts Amherst"],"affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst","institution_ids":["https://openalex.org/I24603500"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5090531845"],"corresponding_institution_ids":["https://openalex.org/I24603500"],"apc_list":null,"apc_paid":null,"fwci":1.2037,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.7989105,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/remote-direct-memory-access","display_name":"Remote direct memory access","score":0.9028640985488892},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8412444591522217},{"id":"https://openalex.org/keywords/edge-computing","display_name":"Edge computing","score":0.6588460803031921},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5990662574768066},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5403892993927002},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.5354630351066589},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5220329761505127},{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.4988882541656494},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.49352967739105225},{"id":"https://openalex.org/keywords/context-switch","display_name":"Context switch","score":0.49178364872932434},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.49028661847114563},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.4426720440387726},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.42521268129348755},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.37058281898498535},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.33235251903533936},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.31726449728012085},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2642027735710144},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.10571560263633728}],"concepts":[{"id":"https://openalex.org/C130795937","wikidata":"https://www.wikidata.org/wiki/Q2561570","display_name":"Remote direct memory access","level":2,"score":0.9028640985488892},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8412444591522217},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.6588460803031921},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5990662574768066},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5403892993927002},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.5354630351066589},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5220329761505127},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.4988882541656494},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.49352967739105225},{"id":"https://openalex.org/C53833338","wikidata":"https://www.wikidata.org/wiki/Q1061424","display_name":"Context switch","level":2,"score":0.49178364872932434},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.49028661847114563},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.4426720440387726},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.42521268129348755},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.37058281898498535},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.33235251903533936},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.31726449728012085},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2642027735710144},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10571560263633728},{"id":"https://openalex.org/C87717796","wikidata":"https://www.wikidata.org/wiki/Q146326","display_name":"Environmental engineering","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iwqos57198.2023.10188785","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwqos57198.2023.10188785","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/ACM 31st International Symposium on Quality of Service (IWQoS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.6399999856948853}],"awards":[{"id":"https://openalex.org/G8626675799","display_name":null,"funder_award_id":"2105494,1908536,2211302,2211888","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1577304665","https://openalex.org/W1861492603","https://openalex.org/W2071634873","https://openalex.org/W2110946051","https://openalex.org/W2114623221","https://openalex.org/W2117539524","https://openalex.org/W2123893563","https://openalex.org/W2135099885","https://openalex.org/W2786171709","https://openalex.org/W2809735256","https://openalex.org/W2889463335","https://openalex.org/W2926767350","https://openalex.org/W2945357343","https://openalex.org/W2964108773","https://openalex.org/W3012088208","https://openalex.org/W3097904259","https://openalex.org/W3101708369","https://openalex.org/W3130823781","https://openalex.org/W3132412959","https://openalex.org/W3156865694","https://openalex.org/W3216099748","https://openalex.org/W4214603110","https://openalex.org/W4297808460","https://openalex.org/W4317935380","https://openalex.org/W6730956707","https://openalex.org/W6747759466","https://openalex.org/W6752636418","https://openalex.org/W6762488536","https://openalex.org/W6779103662"],"related_works":["https://openalex.org/W3211931762","https://openalex.org/W2942586735","https://openalex.org/W4225757241","https://openalex.org/W2534668683","https://openalex.org/W3162654428","https://openalex.org/W4375928818","https://openalex.org/W4293812074","https://openalex.org/W3013363822","https://openalex.org/W4287076991","https://openalex.org/W3184768109"],"abstract_inverted_index":{"It":[0],"is":[1,12,98],"commonly":[2],"assumed":[3],"that":[4,16,58,173,201],"the":[5,18,38,79,105,112,118,132,146,158,179],"end-to-end":[6],"networking":[7,33],"performance":[8,134,183],"of":[9,17,41,104,108,114,120,129,161,207],"edge":[10,25,75],"offloading":[11],"purely":[13],"dictated":[14],"by":[15,137],"network":[19,70,95],"connectivity":[20],"between":[21],"end":[22],"devices":[23],"and":[24,44,64,87,117,127,156,189],"computing":[26,76],"facilities,":[27],"where":[28],"ongoing":[29],"innovation":[30],"in":[31,111,149,185],"5G/6G":[32],"can":[34,194,204],"help.":[35],"However,":[36],"with":[37],"growing":[39],"complexity":[40],"edge-offloaded":[42],"computation":[43,115,130,154],"dynamic":[45],"load":[46],"balancing":[47],"requirements,":[48],"an":[49],"offloaded":[50],"task":[51],"often":[52],"goes":[53],"through":[54],"a":[55,68,73,99,170],"multi-stage":[56],"pipeline":[57],"spans":[59],"across":[60],"multiple":[61],"compute":[62],"nodes":[63],"proxies":[65],"interconnected":[66],"via":[67],"dedicated":[69],"fabric":[71],"within":[72],"given":[74],"facility.":[77],"As":[78],"latest":[80],"hardware-accelerated":[81,163,192],"transport":[82],"technologies":[83,110],"such":[84,94,123],"as":[85,124],"RDMA":[86,89,203],"GPUDirect":[88,202],"are":[90],"adopted":[91],"to":[92,212],"build":[93,169],"fabric,":[96],"there":[97],"need":[100],"for":[101],"good":[102],"understanding":[103],"full":[106],"potential":[107,159],"these":[109,138],"context":[113],"offload":[116],"effect":[119],"different":[121],"factors":[122],"GPU":[125],"scheduling":[126],"characteristics":[128],"on":[131],"net":[133],"gain":[135],"achievable":[136],"technologies.":[139],"This":[140],"paper":[141],"unveils":[142],"detailed":[143],"insights":[144],"into":[145],"latency":[147],"overhead":[148],"typical":[150],"machine":[151],"learning":[152],"(ML)-based":[153],"pipelines":[155,188],"analyzes":[157],"benefits":[160],"adopting":[162],"communication.":[164],"To":[165],"this":[166],"end,":[167],"we":[168,181,199],"model-serving":[171,187,208],"framework":[172],"supports":[174],"various":[175],"communication":[176,193],"mechanisms.":[177],"Using":[178],"framework,":[180],"identify":[182],"bottlenecks":[184],"state-of-the-art":[186],"show":[190,200],"how":[191],"alleviate":[195],"them.":[196],"For":[197],"example,":[198],"save":[205],"15-50%":[206],"latency,":[209],"which":[210],"amounts":[211],"70\u2013160":[213],"ms.":[214]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2025-12-04T23:47:47.292601","created_date":"2025-10-10T00:00:00"}
