{"id":"https://openalex.org/W4415708108","doi":"https://doi.org/10.1109/icme59968.2025.11209790","title":"Optimization of Multimodal Inputs Based on Diffusion Models: Zero-Shot Semantic Image Generation","display_name":"Optimization of Multimodal Inputs Based on Diffusion Models: Zero-Shot Semantic Image Generation","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415708108","doi":"https://doi.org/10.1109/icme59968.2025.11209790"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209790","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209790","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083090202","display_name":"Leilei Wang","orcid":"https://orcid.org/0009-0003-5422-0877"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Leilei Wang","raw_affiliation_strings":["University of Science and Technology of China,Hefei,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103148259","display_name":"Renjie Lu","orcid":"https://orcid.org/0009-0005-2060-5159"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Renjie Lu","raw_affiliation_strings":["University of Science and Technology of China,Hefei,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109731559","display_name":"Fengzhao Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fengzhao Sun","raw_affiliation_strings":["University of Science and Technology of China,Hefei,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081283108","display_name":"Yunxiang Zhang","orcid":"https://orcid.org/0000-0001-5633-7786"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunxiang Zhang","raw_affiliation_strings":["University of Science and Technology of China,Hefei,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048818071","display_name":"Jun Yu","orcid":"https://orcid.org/0000-0002-3197-8103"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yu","raw_affiliation_strings":["University of Science and Technology of China,Hefei,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115604895","display_name":"Qingsong Liu","orcid":"https://orcid.org/0009-0004-1208-3031"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingsong Liu","raw_affiliation_strings":["Unisound AI Technology Co., Ltd.,Hefei,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Unisound AI Technology Co., Ltd.,Hefei,China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Sun Jianqing","orcid":null},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sun Jianqing","raw_affiliation_strings":["Unisound AI Technology Co., Ltd.,Hefei,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Unisound AI Technology Co., Ltd.,Hefei,China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5120192680","display_name":"Liang Jiaen","orcid":null},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Jiaen","raw_affiliation_strings":["Unisound AI Technology Co., Ltd.,Hefei,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Unisound AI Technology Co., Ltd.,Hefei,China","institution_ids":["https://openalex.org/I16365422"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.27091787,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8500000238418579,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.8500000238418579,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.025200000032782555,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.02199999988079071,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7390999794006348},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6951000094413757},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.48820000886917114},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.43299999833106995},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.38920000195503235},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.37700000405311584},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.36890000104904175},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3603000044822693}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8004000186920166},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7390999794006348},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6951000094413757},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.619700014591217},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.48820000886917114},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.43299999833106995},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.38920000195503235},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.37700000405311584},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.36890000104904175},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.36320000886917114},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3603000044822693},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.358599990606308},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35569998621940613},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.3294999897480011},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.32420000433921814},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.31290000677108765},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2639000117778778}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209790","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209790","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2340897893","https://openalex.org/W2560023338","https://openalex.org/W2737258237","https://openalex.org/W2962914239","https://openalex.org/W2962974533","https://openalex.org/W2963800363","https://openalex.org/W3034785488","https://openalex.org/W3034954643","https://openalex.org/W4214868041","https://openalex.org/W4226178953","https://openalex.org/W4312433568","https://openalex.org/W4312815172","https://openalex.org/W4312933868","https://openalex.org/W4313148383","https://openalex.org/W4385237348","https://openalex.org/W4386065257","https://openalex.org/W4390873054"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,7,20,34,72,77,88,100,106,111,130,136,141,151,156,159,166,169],"continuous":[2],"advancement":[3],"of":[4,9,25,36,79,110,132],"large":[5],"models,":[6],"scale":[8],"data":[10,29],"has":[11],"become":[12],"increasingly":[13],"important":[14],"in":[15,150],"semantic":[16,27],"segmentation":[17,28],"tasks.":[18,81],"However,":[19],"complexity":[21],"and":[22,68,120],"high":[23],"cost":[24,78],"annotating":[26],"pose":[30],"significant":[31],"challenges":[32],"to":[33,41,56,75,86,98],"expansion":[35],"datasets.":[37],"This":[38,61],"study":[39],"aims":[40],"leverage":[42],"pre-trained":[43],"diffusion":[44],"generative":[45],"models":[46],"for":[47],"conditional":[48],"image":[49],"generation,":[50],"where":[51],"labeled":[52],"masks":[53],"are":[54],"used":[55],"generate":[57],"corresponding":[58],"synthetic":[59],"images.":[60],"ensures":[62],"a":[63,94,147],"direct":[64],"correspondence":[65],"between":[66],"input":[67,101],"output,":[69],"effectively":[70],"bypassing":[71],"annotation":[73],"stage":[74],"reduce":[76],"labor-intensive":[80],"We":[82],"employ":[83],"multimodal":[84,95],"conditions,":[85,103],"control":[87,102],"generation":[89,118,124,138],"results.":[90,113],"Additionally,":[91],"we":[92,115],"propose":[93],"alignment":[96],"scheme":[97],"optimize":[99],"thereby":[104],"improving":[105],"spatial":[107],"structural":[108],"accuracy":[109],"generated":[112],"Furthermore,":[114],"explore":[116],"zero-shot":[117,123,137],"tasks":[119],"successfully":[121],"achieve":[122],"performance":[125,160],"across":[126],"multiple":[127],"datasets,":[128],"demonstrating":[129],"effectiveness":[131],"our":[133,144],"approach.":[134],"In":[135],"experiments":[139],"on":[140,165],"Cityscapes":[142],"dataset,":[143,158,168],"method":[145],"achieved":[146],"0.6%":[148],"improvement":[149,161,170],"mIoU":[152],"evaluation":[153],"metric.":[154],"On":[155],"ADE20K":[157],"reached":[162],"2.52%,":[163],"while":[164],"COCO-Stuff":[167],"was":[171],"2.43%.":[172]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-30T00:00:00"}
