{"id":"https://openalex.org/W7151433620","doi":"https://doi.org/10.48550/arxiv.2604.04335","title":"GENSERVE: Efficient Co-Serving of Heterogeneous Diffusion Model Workloads","display_name":"GENSERVE: Efficient Co-Serving of Heterogeneous Diffusion Model Workloads","publication_year":2026,"publication_date":"2026-04-06","ids":{"openalex":"https://openalex.org/W7151433620","doi":"https://doi.org/10.48550/arxiv.2604.04335"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.04335","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04335","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.04335","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133131342","display_name":"Fanjiang Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ye, Fanjiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047681423","display_name":"Zhangke Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zhangke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133122962","display_name":"Xinrui Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Xinrui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5117436418","display_name":"Ethan Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Ethan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133137594","display_name":"Russell Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Russell","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011837748","display_name":"Kaijian Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Kaijian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133106255","display_name":"Jingwei Zuo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zuo, Jingwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102728074","display_name":"Desen Sun","orcid":"https://orcid.org/0000-0001-8630-7959"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Desen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133146699","display_name":"Ye Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Ye","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133129741","display_name":"Triston Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Triston","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133125078","display_name":"Myungjin Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Myungjin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133087606","display_name":"Arvind Krishnamurthy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Krishnamurthy, Arvind","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133104716","display_name":"Yuke Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yuke","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5133131342"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.14190000295639038,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.14190000295639038,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.12950000166893005,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.10970000177621841,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6277999877929688},{"id":"https://openalex.org/keywords/predictability","display_name":"Predictability","score":0.5422999858856201},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5135999917984009},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5074999928474426},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.4999000132083893},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.427700012922287},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4172999858856201},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.39309999346733093},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.3831999897956848}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8396999835968018},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.6406999826431274},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6277999877929688},{"id":"https://openalex.org/C197640229","wikidata":"https://www.wikidata.org/wiki/Q2534066","display_name":"Predictability","level":2,"score":0.5422999858856201},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5135999917984009},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5074999928474426},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.4999000132083893},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.427700012922287},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4172999858856201},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.39309999346733093},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3831999897956848},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.3659000098705292},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.3653999865055084},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.35600000619888306},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.34940001368522644},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.34389999508857727},{"id":"https://openalex.org/C193702766","wikidata":"https://www.wikidata.org/wiki/Q1414548","display_name":"Concurrency","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.32850000262260437},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.3255999982357025},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3127000033855438},{"id":"https://openalex.org/C2780609101","wikidata":"https://www.wikidata.org/wiki/Q17156588","display_name":"Resource management (computing)","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C2984822820","wikidata":"https://www.wikidata.org/wiki/Q1123036","display_name":"Processor scheduling","level":3,"score":0.26429998874664307},{"id":"https://openalex.org/C68710425","wikidata":"https://www.wikidata.org/wiki/Q5275442","display_name":"Diffusion process","level":3,"score":0.2624000012874603},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.25690001249313354},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C51332947","wikidata":"https://www.wikidata.org/wiki/Q1172305","display_name":"Shared resource","level":2,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.04335","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04335","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.04335","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04335","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.4261510670185089}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Diffusion":[0],"models":[1],"have":[2],"emerged":[3],"as":[4],"the":[5,69,73,145,154],"prevailing":[6],"approach":[7],"for":[8,104],"text-to-image":[9],"(T2I)":[10],"and":[11,39,49,92,126],"text-to-video":[12],"(T2V)":[13],"generation,":[14],"yet":[15],"production":[16],"platforms":[17],"must":[18],"increasingly":[19],"serve":[20],"both":[21],"modalities":[22],"on":[23],"shared":[24],"GPU":[25],"clusters":[26],"while":[27],"meeting":[28],"stringent":[29],"latency":[30,50],"SLOs.":[31],"Co-serving":[32],"such":[33],"heterogeneous":[34],"workloads":[35],"is":[36,83,93],"challenging:":[37],"T2I":[38],"T2V":[40],"requests":[41],"exhibit":[42],"vastly":[43],"different":[44],"compute":[45],"demands,":[46],"parallelism":[47,122],"characteristics,":[48],"requirements,":[51],"leading":[52],"to":[53,76,151],"significant":[54],"SLO":[55,146],"violations":[56],"in":[57,88],"existing":[58],"serving":[59,78],"systems.":[60],"We":[61],"present":[62],"GENSERVE,":[63],"a":[64,100],"co-serving":[65],"system":[66],"that":[67,84,130,142],"leverages":[68],"inherent":[70],"predictability":[71],"of":[72],"diffusion":[74,85],"process":[75],"optimize":[77],"efficiency.":[79],"A":[80],"central":[81],"insight":[82],"inference":[86],"proceeds":[87],"discrete,":[89],"predictable":[90],"steps":[91],"naturally":[94],"preemptible":[95],"at":[96],"step":[97],"boundaries,":[98],"opening":[99],"new":[101],"design":[102],"space":[103],"heterogeneity-aware":[105],"resource":[106,111,133],"management.":[107],"GENSERVE":[108,143],"introduces":[109],"step-level":[110],"adaptation":[112],"through":[113],"three":[114],"coordinated":[115],"mechanisms:":[116],"intelligent":[117],"video":[118],"preemption,":[119],"elastic":[120],"sequence":[121],"with":[123],"dynamic":[124],"batching,":[125],"an":[127],"SLO-aware":[128],"scheduler":[129],"jointly":[131],"optimizes":[132],"allocation":[134],"across":[135,157],"all":[136],"concurrent":[137],"requests.":[138],"Experimental":[139],"results":[140],"show":[141],"improves":[144],"attainment":[147],"rate":[148],"by":[149],"up":[150],"44%":[152],"over":[153],"strongest":[155],"baseline":[156],"diverse":[158],"configurations.":[159]},"counts_by_year":[],"updated_date":"2026-04-08T06:07:18.267832","created_date":"2026-04-08T00:00:00"}
