{"id":"https://openalex.org/W7123622058","doi":"https://doi.org/10.1145/3772052.3772206","title":"Understanding Diffusion Model Serving in Production: A Top-Down Analysis of Workload, Scheduling, and Resource Efficiency","display_name":"Understanding Diffusion Model Serving in Production: A Top-Down Analysis of Workload, Scheduling, and Resource Efficiency","publication_year":2025,"publication_date":"2025-11-19","ids":{"openalex":"https://openalex.org/W7123622058","doi":"https://doi.org/10.1145/3772052.3772206"},"language":null,"primary_location":{"id":"doi:10.1145/3772052.3772206","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772206","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3772052.3772206","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033311175","display_name":"Yanying Lin","orcid":"https://orcid.org/0000-0002-4809-9543"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yanying Lin","raw_affiliation_strings":["Shenzhen Institute of Advanced Integration Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China and University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Advanced Integration Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China and University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102653479","display_name":"Shuaipeng Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I3045169105","display_name":"Southern University of Science and Technology","ror":"https://ror.org/049tv2d57","country_code":"CN","type":"education","lineage":["https://openalex.org/I3045169105"]},{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuaipeng Wu","raw_affiliation_strings":["Southern University of Science and Technology, Shenzhen, Guangdong, China, Shenzhen Institute of Advanced Integration Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China and AIOS Team, Alibaba Group Inc, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Southern University of Science and Technology, Shenzhen, Guangdong, China, Shenzhen Institute of Advanced Integration Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China and AIOS Team, Alibaba Group Inc, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I4210145761","https://openalex.org/I3045169105"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122921181","display_name":"Shutian Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shutian Luo","raw_affiliation_strings":["University of Virginia, Charlottesville, Virginia, USA"],"affiliations":[{"raw_affiliation_string":"University of Virginia, Charlottesville, Virginia, USA","institution_ids":["https://openalex.org/I51556381"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084983727","display_name":"Hong Xu","orcid":"https://orcid.org/0000-0001-7874-4518"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Hong Xu","raw_affiliation_strings":["The Chinese University of Hong Kong, Hongkong, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hongkong, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064217355","display_name":"Haiying Shen","orcid":"https://orcid.org/0000-0002-7681-6255"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haiying Shen","raw_affiliation_strings":["University of Virginia, Charlottesville, Virginia, USA"],"affiliations":[{"raw_affiliation_string":"University of Virginia, Charlottesville, Virginia, USA","institution_ids":["https://openalex.org/I51556381"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122932550","display_name":"Chong Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chong Ma","raw_affiliation_strings":["AIOS Team, Alibaba Group Inc, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"AIOS Team, Alibaba Group Inc, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122940655","display_name":"Min Shen","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Shen","raw_affiliation_strings":["AIOS Team, Alibaba Group Inc, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"AIOS Team, Alibaba Group Inc, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100420755","display_name":"Le Chen","orcid":"https://orcid.org/0000-0003-3789-2578"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Le Chen","raw_affiliation_strings":["AIOS Team, Alibaba Group Inc, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"AIOS Team, Alibaba Group Inc, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122955994","display_name":"Chengzhong Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Chengzhong Xu","raw_affiliation_strings":["University of Macau, Macau, China"],"affiliations":[{"raw_affiliation_string":"University of Macau, Macau, China","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122992044","display_name":"Lin Qu","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Qu","raw_affiliation_strings":["AIOS Team, Alibaba Group Inc, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"AIOS Team, Alibaba Group Inc, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5122968621","display_name":"Kejiang Ye","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kejiang Ye","raw_affiliation_strings":["Shenzhen Institute of Advanced Integration Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institute of Advanced Integration Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210145761"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5033311175"],"corresponding_institution_ids":["https://openalex.org/I4210145761"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.80806382,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"15"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.20239999890327454,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.20239999890327454,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.11050000041723251,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.10379999876022339,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/production","display_name":"Production (economics)","score":0.6383000016212463},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6057999730110168},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.5952000021934509},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.5310999751091003},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.5228000283241272},{"id":"https://openalex.org/keywords/resource-efficiency","display_name":"Resource efficiency","score":0.5085999965667725}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6557999849319458},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.6383000016212463},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6057999730110168},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.5952000021934509},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.5310999751091003},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.5228000283241272},{"id":"https://openalex.org/C2777958785","wikidata":"https://www.wikidata.org/wiki/Q17120940","display_name":"Resource efficiency","level":2,"score":0.5085999965667725},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.4147000014781952},{"id":"https://openalex.org/C2982969694","wikidata":"https://www.wikidata.org/wiki/Q7247859","display_name":"Production efficiency","level":2,"score":0.38589999079704285},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.33629998564720154},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.3206999897956848},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.313400000333786},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.3043999969959259},{"id":"https://openalex.org/C13736549","wikidata":"https://www.wikidata.org/wiki/Q4489420","display_name":"Industrial engineering","level":1,"score":0.2662999927997589},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.26339998841285706},{"id":"https://openalex.org/C2992770021","wikidata":"https://www.wikidata.org/wiki/Q7247850","display_name":"Production model","level":3,"score":0.2599000036716461}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3772052.3772206","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772206","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3772052.3772206","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772206","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.6253771781921387}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":6,"referenced_works":["https://openalex.org/W3205898353","https://openalex.org/W4387321090","https://openalex.org/W4387321091","https://openalex.org/W4390873054","https://openalex.org/W4394892775","https://openalex.org/W4394998727"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,47,51],"comprehensive":[4],"analysis":[5,43],"of":[6,63],"diffusion":[7,26],"model":[8,27],"serving":[9,28],"challenges":[10,36],"in":[11],"production":[12,64],"cloud":[13],"environments.":[14],"We":[15],"examine":[16],"the":[17],"unique":[18],"computational":[19],"patterns":[20],"and":[21],"resource":[22],"requirements":[23],"that":[24],"distinguish":[25],"from":[29,37,50],"traditional":[30],"ML":[31],"workloads,":[32],"revealing":[33],"fundamental":[34],"systemlevel":[35],"their":[38],"multi-stage":[39],"pipeline":[40],"architectures.":[41],"Our":[42],"is":[44],"based":[45],"on":[46],"dataset":[48],"collected":[49],"commercial":[52],"image":[53],"generation":[54],"service":[55],"processing":[56],"3.5":[57],"million":[58],"requests":[59],"across":[60],"300+":[61],"GPUs":[62],"operation.":[65]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2026-01-14T00:00:00"}
