{"id":"https://openalex.org/W3116103263","doi":"https://doi.org/10.1109/tpds.2020.3047638","title":"E<sup>2</sup>bird: <u>E</u>nhanced <u>E</u>lastic <u>B</u>atch for <u>I</u>mproving <u>R</u>esponsiveness and Throughput of <u>D</u>eep Learning Services","display_name":"E<sup>2</sup>bird: <u>E</u>nhanced <u>E</u>lastic <u>B</u>atch for <u>I</u>mproving <u>R</u>esponsiveness and Throughput of <u>D</u>eep Learning Services","publication_year":2020,"publication_date":"2020-12-28","ids":{"openalex":"https://openalex.org/W3116103263","doi":"https://doi.org/10.1109/tpds.2020.3047638","mag":"3116103263"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2020.3047638","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2020.3047638","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008837660","display_name":"Weihao Cui","orcid":"https://orcid.org/0000-0002-6646-5260"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weihao Cui","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100377840","display_name":"Quan Chen","orcid":"https://orcid.org/0000-0001-5832-0347"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Quan Chen","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","Shanghai Institute for Advanced Communication and Data Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Shanghai Institute for Advanced Communication and Data Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063596525","display_name":"Han Zhao","orcid":"https://orcid.org/0000-0002-1561-5329"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Han Zhao","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013903721","display_name":"Mengze Wei","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengze Wei","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088337560","display_name":"Xiaoxin Tang","orcid":"https://orcid.org/0000-0002-7404-2073"},"institutions":[{"id":"https://openalex.org/I181679659","display_name":"Shanghai University of Finance and Economics","ror":"https://ror.org/00wtvfq62","country_code":"CN","type":"education","lineage":["https://openalex.org/I181679659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoxin Tang","raw_affiliation_strings":["Department of Computer Science, Shanghai University of Finance and Economics, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Shanghai University of Finance and Economics, Shanghai, China","institution_ids":["https://openalex.org/I181679659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039318240","display_name":"Minyi Guo","orcid":"https://orcid.org/0000-0003-0034-2302"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minyi Guo","raw_affiliation_strings":["Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","Shanghai Institute for Advanced Communication and Data Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]},{"raw_affiliation_string":"Shanghai Institute for Advanced Communication and Data Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5008837660"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":1.374,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.8420311,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"32","issue":"6","first_page":"1307","last_page":"1321"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7323137521743774},{"id":"https://openalex.org/keywords/quality-of-service","display_name":"Quality of service","score":0.5911123156547546},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5081539154052734},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4933094084262848},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.48169001936912537},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.47886893153190613},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4464360475540161},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.43595898151397705},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3854914605617523},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34469717741012573},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.25314798951148987},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.22222572565078735},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.16978421807289124}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7323137521743774},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.5911123156547546},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5081539154052734},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4933094084262848},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.48169001936912537},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.47886893153190613},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4464360475540161},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.43595898151397705},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3854914605617523},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34469717741012573},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.25314798951148987},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.22222572565078735},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.16978421807289124},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2020.3047638","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2020.3047638","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G129499350","display_name":null,"funder_award_id":"61632017","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2465329715","display_name":null,"funder_award_id":"61832006","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7942366687","display_name":null,"funder_award_id":"62022057","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":61,"referenced_works":["https://openalex.org/W587794757","https://openalex.org/W1530262073","https://openalex.org/W1979527452","https://openalex.org/W2076063813","https://openalex.org/W2119144962","https://openalex.org/W2155893237","https://openalex.org/W2172654076","https://openalex.org/W2204075824","https://openalex.org/W2285660444","https://openalex.org/W2286365479","https://openalex.org/W2323909431","https://openalex.org/W2338908902","https://openalex.org/W2402144811","https://openalex.org/W2542459869","https://openalex.org/W2580688187","https://openalex.org/W2604514113","https://openalex.org/W2604787577","https://openalex.org/W2625231790","https://openalex.org/W2657126969","https://openalex.org/W2767650207","https://openalex.org/W2779888504","https://openalex.org/W2787998955","https://openalex.org/W2798291715","https://openalex.org/W2809420642","https://openalex.org/W2886885214","https://openalex.org/W2899771611","https://openalex.org/W2903202831","https://openalex.org/W2929502194","https://openalex.org/W2934853022","https://openalex.org/W2952562115","https://openalex.org/W2953384591","https://openalex.org/W2964108773","https://openalex.org/W2964299589","https://openalex.org/W2982157693","https://openalex.org/W2998218113","https://openalex.org/W3005664618","https://openalex.org/W3103894541","https://openalex.org/W4231332361","https://openalex.org/W4233628754","https://openalex.org/W4235357114","https://openalex.org/W4244330903","https://openalex.org/W4249935458","https://openalex.org/W4297808460","https://openalex.org/W4301239768","https://openalex.org/W4301361180","https://openalex.org/W4302296459","https://openalex.org/W6617368339","https://openalex.org/W6631660994","https://openalex.org/W6637151318","https://openalex.org/W6677580257","https://openalex.org/W6687619766","https://openalex.org/W6696004547","https://openalex.org/W6703652217","https://openalex.org/W6713134421","https://openalex.org/W6730956707","https://openalex.org/W6747759466","https://openalex.org/W6748645090","https://openalex.org/W6752199355","https://openalex.org/W6753846162","https://openalex.org/W6756040250","https://openalex.org/W6756729454"],"related_works":["https://openalex.org/W4375867731","https://openalex.org/W2136583354","https://openalex.org/W2111238207","https://openalex.org/W2055243143","https://openalex.org/W2760721665","https://openalex.org/W2611989081","https://openalex.org/W3008625068","https://openalex.org/W3128807919","https://openalex.org/W3176411177","https://openalex.org/W3035501883"],"abstract_inverted_index":{"We":[0],"aim":[1],"to":[2,23,62,187,196],"tackle":[3],"existing":[4],"problems":[5],"about":[6],"deep":[7,26,36,104],"learning":[8,37,105],"serving":[9,38,106],"on":[10,167],"GPUs":[11,18,60],"in":[12,42],"the":[13,16,49,57,71,76,83,87,129,133,151,162,180,192,201],"view":[14],"of":[15,48,59,78,86,111,132,147,183],"system.":[17],"have":[19,30],"been":[20],"widely":[21],"adopted":[22],"serve":[24],"online":[25,65],"learning-based":[27],"services":[28],"that":[29,51,70,108,173],"stringent":[31],"QoS(Quality-of-Service)":[32],"requirements.":[33],"However,":[34],"emerging":[35],"systems":[39],"often":[40],"result":[41],"poor":[43,72,88],"responsiveness":[44,89],"and":[45,55,75,90,120,136,190],"low":[46,91],"throughput":[47,193],"inferences":[50,159,184],"damage":[52],"user":[53],"experience":[54],"increase":[56],"number":[58],"required":[61],"host":[63],"an":[64,121,168],"service.":[66],"Our":[67,164],"investigation":[68],"shows":[69],"batching":[73,134],"operation":[74,135],"lack":[77],"data":[79,138],"transfer-computation":[80,139],"overlap":[81],"are":[82],"root":[84],"causes":[85],"throughput.":[92],"To":[93],"this":[94],"end,":[95],"we":[96],"propose":[97],"E":[98,174],"<sup":[99,175],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[100,176],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>":[101,177],"bird,":[102],"a":[103,112,116],"system":[107],"is":[109],"comprised":[110],"GPU-resident":[113],"memory":[114,126],"pool,":[115],"multi-granularity":[117],"inference":[118,142],"engine,":[119],"elastic":[122],"batch":[123,156],"scheduler.":[124],"The":[125,141,155],"pool":[127],"eliminates":[128],"unnecessary":[130],"waiting":[131],"enables":[137,144],"overlap.":[140],"engine":[143],"concurrent":[145],"execution":[146],"different":[148],"batches,":[149],"improving":[150],"GPU":[152],"resource":[153],"utilization.":[154],"scheduler":[157],"organizes":[158],"elasticallyto":[160],"guarantee":[161],"QoS.":[163],"experimental":[165],"results":[166],"Nvidia":[169],"Titan":[170],"RTXGPU":[171],"show":[172],"bird":[178],"reduces":[179],"response":[181],"latency":[182],"by":[185,194],"up":[186,195],"82.4":[188],"percent":[189,198],"improves":[191],"62.8":[197],"while":[199],"guaranteeing":[200],"QoS":[202],"target":[203],"compared":[204],"with":[205],"TensorFlow":[206],"Serving.":[207]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":3}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
