{"id":"https://openalex.org/W4389459125","doi":"https://doi.org/10.1109/tpds.2023.3340518","title":"Graft: Efficient Inference Serving for Hybrid Deep Learning With SLO Guarantees via DNN Re-Alignment","display_name":"Graft: Efficient Inference Serving for Hybrid Deep Learning With SLO Guarantees via DNN Re-Alignment","publication_year":2023,"publication_date":"2023-12-07","ids":{"openalex":"https://openalex.org/W4389459125","doi":"https://doi.org/10.1109/tpds.2023.3340518"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2023.3340518","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2023.3340518","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067494599","display_name":"Jing Wu","orcid":"https://orcid.org/0000-0003-2555-0220"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Wu","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, the Services Computing Technology and System Lab, Cluster and Grid Computing Lab in the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0003-2555-0220","affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, the Services Computing Technology and System Lab, Cluster and Grid Computing Lab in the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100403099","display_name":"Lin Wang","orcid":"https://orcid.org/0000-0001-7181-6128"},"institutions":[{"id":"https://openalex.org/I206945453","display_name":"Paderborn University","ror":"https://ror.org/058kzsd48","country_code":"DE","type":"education","lineage":["https://openalex.org/I206945453"]},{"id":"https://openalex.org/I31512782","display_name":"Technische Universit\u00e4t Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Lin Wang","raw_affiliation_strings":["Paderborn University, Paderborn, Germany","Technical University of Darmstadt, Darmstadt, Germany"],"raw_orcid":"https://orcid.org/0000-0001-7181-6128","affiliations":[{"raw_affiliation_string":"Paderborn University, Paderborn, Germany","institution_ids":["https://openalex.org/I206945453"]},{"raw_affiliation_string":"Technical University of Darmstadt, Darmstadt, Germany","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101465329","display_name":"Qirui Jin","orcid":"https://orcid.org/0009-0002-0698-0903"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qirui Jin","raw_affiliation_strings":["National Engineering Research Center for Big Data Technology and System, the Services Computing Technology and System Lab, Cluster and Grid Computing Lab in the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0002-0698-0903","affiliations":[{"raw_affiliation_string":"National Engineering Research Center for Big Data Technology and System, the Services Computing Technology and System Lab, Cluster and Grid Computing Lab in the School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048398271","display_name":"Fangming Liu","orcid":"https://orcid.org/0000-0002-8570-1345"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangming Liu","raw_affiliation_strings":["Peng Cheng Laboratory, Shenzhen, China","Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-8570-1345","affiliations":[{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.0811,"has_fulltext":false,"cited_by_count":27,"citation_normalized_percentile":{"value":0.95861997,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"35","issue":"2","first_page":"280","last_page":"296"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8909430503845215},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.817573606967926},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.7262309193611145},{"id":"https://openalex.org/keywords/mobile-device","display_name":"Mobile device","score":0.6184073686599731},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.6033502221107483},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5600621104240417},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5448960065841675},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.52183598279953},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.513468325138092},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44334763288497925},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1498124599456787}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8909430503845215},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.817573606967926},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.7262309193611145},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.6184073686599731},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6033502221107483},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5600621104240417},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5448960065841675},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.52183598279953},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.513468325138092},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44334763288497925},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1498124599456787},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2023.3340518","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2023.3340518","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6600000262260437,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[{"id":"https://openalex.org/G8793038005","display_name":"SFB 1053: MAKI  MultiMechanismenAdaption f\u00fcr das k\u00fcnftige Internet","funder_award_id":"210487104","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":88,"referenced_works":["https://openalex.org/W2006023152","https://openalex.org/W2022490362","https://openalex.org/W2568772110","https://openalex.org/W2581065617","https://openalex.org/W2604514113","https://openalex.org/W2605178034","https://openalex.org/W2605258629","https://openalex.org/W2630837129","https://openalex.org/W2736062019","https://openalex.org/W2789383786","https://openalex.org/W2798170643","https://openalex.org/W2892952080","https://openalex.org/W2903278032","https://openalex.org/W2903650079","https://openalex.org/W2918317383","https://openalex.org/W2920031528","https://openalex.org/W2928897890","https://openalex.org/W2931743911","https://openalex.org/W2971447174","https://openalex.org/W2972087877","https://openalex.org/W2972871493","https://openalex.org/W2980856918","https://openalex.org/W2981114133","https://openalex.org/W2982157693","https://openalex.org/W2987053069","https://openalex.org/W2989368243","https://openalex.org/W2996942942","https://openalex.org/W2998506323","https://openalex.org/W2998600867","https://openalex.org/W3011434423","https://openalex.org/W3017194848","https://openalex.org/W3034270904","https://openalex.org/W3037681773","https://openalex.org/W3049640275","https://openalex.org/W3088405768","https://openalex.org/W3093987685","https://openalex.org/W3095488153","https://openalex.org/W3097411828","https://openalex.org/W3102767875","https://openalex.org/W3104263540","https://openalex.org/W3105381414","https://openalex.org/W3144271226","https://openalex.org/W3153345798","https://openalex.org/W3156189202","https://openalex.org/W3157943880","https://openalex.org/W3172857592","https://openalex.org/W3176483701","https://openalex.org/W3186281912","https://openalex.org/W3191321386","https://openalex.org/W3206636350","https://openalex.org/W3208089751","https://openalex.org/W3208113647","https://openalex.org/W3210053704","https://openalex.org/W3210617645","https://openalex.org/W3210705689","https://openalex.org/W3210764291","https://openalex.org/W3211149853","https://openalex.org/W3212924994","https://openalex.org/W4214951654","https://openalex.org/W4246193833","https://openalex.org/W4282959697","https://openalex.org/W4282970339","https://openalex.org/W4283032932","https://openalex.org/W4283210602","https://openalex.org/W4289821779","https://openalex.org/W4308399059","https://openalex.org/W4312060051","https://openalex.org/W4313229743","https://openalex.org/W4321636552","https://openalex.org/W4367047170","https://openalex.org/W4372272185","https://openalex.org/W4388041447","https://openalex.org/W6637373629","https://openalex.org/W6684191040","https://openalex.org/W6730956707","https://openalex.org/W6739696289","https://openalex.org/W6753751555","https://openalex.org/W6758474236","https://openalex.org/W6765599866","https://openalex.org/W6767064347","https://openalex.org/W6772284903","https://openalex.org/W6778330239","https://openalex.org/W6778371292","https://openalex.org/W6779103662","https://openalex.org/W6798686915","https://openalex.org/W6799023841","https://openalex.org/W6803631858","https://openalex.org/W7034064029"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W1590307681","https://openalex.org/W2536018345","https://openalex.org/W4312814274","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W2358353312","https://openalex.org/W2353836703"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"(DNNs)":[3],"have":[4],"been":[5],"widely":[6,172],"adopted":[7],"for":[8,81,138],"various":[9],"mobile":[10,24,40,55],"inference":[11,48,78,156,195],"tasks,":[12],"yet":[13],"their":[14,20],"ever-increasing":[15],"computational":[16],"demands":[17],"are":[18,65],"hindering":[19],"deployment":[21],"on":[22,70,132],"resource-constrained":[23],"devices.":[25,56],"Hybrid":[26],"deep":[27,83],"learning":[28,84],"partitions":[29],"a":[30,43,100,161],"DNN":[31,62,104,109,123,143],"into":[32],"two":[33],"parts":[34],"and":[35,42,141,164,175],"deploys":[36],"them":[37],"across":[38],"the":[39,71,97,120,155,193],"device":[41],"server,":[44],"aiming":[45],"to":[46,67,95,111,114,145,189],"reduce":[47],"latency":[49,86,157],"or":[50],"prolong":[51],"battery":[52],"life":[53],"of":[54,122,171],"However,":[57],"such":[58],"partitioning":[59],"produces":[60],"(non-uniform)":[61],"fragments":[63,110,144],"which":[64],"hard":[66],"serve":[68],"efficiently":[69],"server.":[72],"This":[73],"article":[74],"presents":[75],"Graft\u2014an":[76],"efficient":[77,136],"serving":[79,196],"system":[80],"hybrid":[82],"with":[85,168,192],"service-level":[87],"objective":[88],"(SLO)":[89],"guarantees.":[90],"Our":[91,179],"main":[92],"insight":[93],"is":[94],"mitigate":[96],"non-uniformity":[98],"by":[99,187],"core":[101],"concept":[102],"called":[103],"re-alignment,":[105,124],"allowing":[106],"multiple":[107],"heterogeneous":[108],"be":[112],"restructured":[113],"share":[115],"layers.":[116],"To":[117],"fully":[118],"exploit":[119],"potential":[121],"Graft":[125,162,183],"employs":[126],"fine-grained":[127],"GPU":[128],"resource":[129,151,185],"sharing.":[130],"Based":[131],"that,":[133],"we":[134],"propose":[135],"algorithms":[137],"merging,":[139],"grouping,":[140],"re-aligning":[142],"maximize":[146],"request":[147],"batching":[148],"opportunities,":[149],"minimizing":[150],"consumption":[152],"while":[153],"guaranteeing":[154],"SLO.":[158],"We":[159],"implement":[160],"prototype":[163],"perform":[165],"extensive":[166],"experiments":[167],"five":[169],"types":[170],"used":[173],"DNNs":[174],"real-world":[176],"network":[177],"traces.":[178],"results":[180],"show":[181],"that":[182],"improves":[184],"efficiency":[186],"up":[188],"70%":[190],"compared":[191],"state-of-the-art":[194],"systems.":[197]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":13}],"updated_date":"2026-06-22T08:00:12.763002","created_date":"2025-10-10T00:00:00"}
