{"id":"https://openalex.org/W7163526492","doi":"https://doi.org/10.23919/date69613.2026.11539390","title":"Toward Parallel Serving for Vision-Language Models via Modal Decoupling and Scheduling","display_name":"Toward Parallel Serving for Vision-Language Models via Modal Decoupling and Scheduling","publication_year":2026,"publication_date":"2026-04-20","ids":{"openalex":"https://openalex.org/W7163526492","doi":"https://doi.org/10.23919/date69613.2026.11539390"},"language":null,"primary_location":{"id":"doi:10.23919/date69613.2026.11539390","is_oa":false,"landing_page_url":"https://doi.org/10.23919/date69613.2026.11539390","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 Design, Automation &amp;amp; Test in Europe Conference (DATE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137844745","display_name":"Yijia Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I96852419","display_name":"Capital Normal University","ror":"https://ror.org/005edt527","country_code":"CN","type":"education","lineage":["https://openalex.org/I96852419"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yijia Yang","raw_affiliation_strings":["Capital Normal University,College of Information Engineering,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Capital Normal University,College of Information Engineering,Beijing,China","institution_ids":["https://openalex.org/I96852419"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122997346","display_name":"Yubo Deng","orcid":null},"institutions":[{"id":"https://openalex.org/I96852419","display_name":"Capital Normal University","ror":"https://ror.org/005edt527","country_code":"CN","type":"education","lineage":["https://openalex.org/I96852419"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yubo Deng","raw_affiliation_strings":["Capital Normal University,College of Information Engineering,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Capital Normal University,College of Information Engineering,Beijing,China","institution_ids":["https://openalex.org/I96852419"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026991293","display_name":"Y Q Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I96852419","display_name":"Capital Normal University","ror":"https://ror.org/005edt527","country_code":"CN","type":"education","lineage":["https://openalex.org/I96852419"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yida Wang","raw_affiliation_strings":["Capital Normal University,College of Information Engineering,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Capital Normal University,College of Information Engineering,Beijing,China","institution_ids":["https://openalex.org/I96852419"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016606164","display_name":"Yuanchao Xu","orcid":"https://orcid.org/0000-0003-4165-9138"},"institutions":[{"id":"https://openalex.org/I96852419","display_name":"Capital Normal University","ror":"https://ror.org/005edt527","country_code":"CN","type":"education","lineage":["https://openalex.org/I96852419"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanchao Xu","raw_affiliation_strings":["Capital Normal University,College of Information Engineering,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Capital Normal University,College of Information Engineering,Beijing,China","institution_ids":["https://openalex.org/I96852419"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026823951","display_name":"Keni Qiu","orcid":"https://orcid.org/0000-0002-5851-777X"},"institutions":[{"id":"https://openalex.org/I96852419","display_name":"Capital Normal University","ror":"https://ror.org/005edt527","country_code":"CN","type":"education","lineage":["https://openalex.org/I96852419"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Keni Qiu","raw_affiliation_strings":["Capital Normal University,College of Information Engineering,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Capital Normal University,College of Information Engineering,Beijing,China","institution_ids":["https://openalex.org/I96852419"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I96852419"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.80789389,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.814300000667572,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.814300000667572,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.012500000186264515,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.011599999852478504,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6000000238418579},{"id":"https://openalex.org/keywords/decoupling","display_name":"Decoupling (probability)","score":0.46399998664855957},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.45329999923706055},{"id":"https://openalex.org/keywords/job-shop-scheduling","display_name":"Job shop scheduling","score":0.31949999928474426}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6366000175476074},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6000000238418579},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.46399998664855957},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.45329999923706055},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.37779998779296875},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.31949999928474426},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.295199990272522},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.27160000801086426},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2694000005722046},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25769999623298645}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/date69613.2026.11539390","is_oa":false,"landing_page_url":"https://doi.org/10.23919/date69613.2026.11539390","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 Design, Automation &amp;amp; Test in Europe Conference (DATE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2560730294","https://openalex.org/W3112103703","https://openalex.org/W4327545654","https://openalex.org/W4385245566","https://openalex.org/W4387321091","https://openalex.org/W4391094120","https://openalex.org/W4393857491","https://openalex.org/W4409060278","https://openalex.org/W4413144805","https://openalex.org/W4415797413","https://openalex.org/W7117706198","https://openalex.org/W7160195329"],"related_works":[],"abstract_inverted_index":{"Vision-Language":[0],"Models":[1],"(VLMs)":[2],"have":[3],"demonstrated":[4],"strong":[5],"performance":[6],"in":[7],"tasks":[8],"such":[9],"as":[10],"image":[11],"captioning":[12],"and":[13,27,42,70,83,89,107,120],"visual":[14],"question":[15],"answering.":[16],"Under":[17],"mixed":[18],"workloads,":[19,111],"however,":[20],"the":[21],"differing":[22],"inference":[23],"pipelines":[24],"for":[25,57,97],"text-only":[26,102],"multimodal":[28,98],"requests":[29,99],"create":[30],"heterogeneity":[31],"that":[32,52,79,94],"existing":[33],"serving":[34,50,55],"systems":[35],"fail":[36],"to":[37,39,118,125],"optimize\u2014leading":[38],"high":[40],"latency":[41,122],"poor":[43],"fairness.":[44],"We":[45],"propose":[46],"Duet-Infer,":[47],"a":[48,75,91],"modality-aware":[49],"framework":[51],"enhances":[53],"single-GPU":[54],"efficiency":[56],"VLMs":[58],"through":[59],"three":[60],"key":[61],"contributions:":[62],"(i)":[63],"parallel":[64],"computation":[65],"enabled":[66],"by":[67,116,123],"preprocessing":[68],"parallelism":[69],"decoupled":[71],"vision-language":[72],"execution,":[73],"(ii)":[74],"shared":[76],"memory":[77],"manager":[78],"eliminates":[80],"weight":[81],"redundancy":[82],"supports":[84],"efficient":[85],"encoder":[86],"cache":[87],"sharing,":[88],"(iii)":[90],"fairness-aware":[92],"scheduler":[93],"reduces":[95,113],"delays":[96],"without":[100],"penalizing":[101],"ones.":[103],"Implemented":[104],"within":[105],"vLLM":[106],"evaluated":[108],"on":[109],"realistic":[110],"DuetInfer":[112],"P99":[114],"TTFT":[115],"up":[117,124],"33.7%":[119],"end-to-end":[121],"20%.":[126]},"counts_by_year":[],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2026-06-05T00:00:00"}
