{"id":"https://openalex.org/W4417337172","doi":"https://doi.org/10.1109/tcsvt.2025.3643915","title":"UDMMColor: A Unified Diffusion Model for Multi-Modal Colorization","display_name":"UDMMColor: A Unified Diffusion Model for Multi-Modal Colorization","publication_year":2025,"publication_date":"2025-12-15","ids":{"openalex":"https://openalex.org/W4417337172","doi":"https://doi.org/10.1109/tcsvt.2025.3643915"},"language":null,"primary_location":{"id":"doi:10.1109/tcsvt.2025.3643915","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3643915","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102508676","display_name":"Yan Zhai","orcid":"https://orcid.org/0000-0002-5074-4716"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Zhai","raw_affiliation_strings":["School of Computer and Cyber Sciences, Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-5074-4716","affiliations":[{"raw_affiliation_string":"School of Computer and Cyber Sciences, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071155745","display_name":"Zerui Han","orcid":"https://orcid.org/0000-0002-7697-7796"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zerui Han","raw_affiliation_strings":["School of Information and Communication Engineering, Communication University of China, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019100110","display_name":"Zhulin Tao","orcid":"https://orcid.org/0000-0001-9011-8464"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhulin Tao","raw_affiliation_strings":["School of Information and Communication Engineering, Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9011-8464","affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059973993","display_name":"Xianglin Huang","orcid":"https://orcid.org/0000-0003-0324-4687"},"institutions":[{"id":"https://openalex.org/I75689368","display_name":"Communication University of China","ror":"https://ror.org/04facbs33","country_code":"CN","type":"education","lineage":["https://openalex.org/I75689368"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianglin Huang","raw_affiliation_strings":["School of Computer and Cyber Sciences, Communication University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-0324-4687","affiliations":[{"raw_affiliation_string":"School of Computer and Cyber Sciences, Communication University of China, Beijing, China","institution_ids":["https://openalex.org/I75689368"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004164569","display_name":"Jinshan Pan","orcid":"https://orcid.org/0000-0003-0304-9507"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinshan Pan","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-0304-9507","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035112538","display_name":"Jinhui Tang","orcid":"https://orcid.org/0000-0001-9008-222X"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinhui Tang","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0001-9008-222X","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.36002922,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"36","issue":"5","first_page":"6617","last_page":"6629"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9168000221252441,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9168000221252441,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.020800000056624413,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.004800000227987766,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.6159999966621399},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4481000006198883},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44780001044273376},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.4275999963283539},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.40290001034736633},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3822999894618988},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.37700000405311584},{"id":"https://openalex.org/keywords/color-image","display_name":"Color image","score":0.375900000333786}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6796000003814697},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.6159999966621399},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6067000031471252},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.54339998960495},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4481000006198883},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44780001044273376},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.4275999963283539},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.40290001034736633},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3822999894618988},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.37700000405311584},{"id":"https://openalex.org/C142616399","wikidata":"https://www.wikidata.org/wiki/Q5148604","display_name":"Color image","level":4,"score":0.375900000333786},{"id":"https://openalex.org/C193536780","wikidata":"https://www.wikidata.org/wiki/Q1513153","display_name":"Edge detection","level":4,"score":0.36660000681877136},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.35920000076293945},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3296999931335449},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.31049999594688416},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.30070000886917114},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.29350000619888306},{"id":"https://openalex.org/C68710425","wikidata":"https://www.wikidata.org/wiki/Q5275442","display_name":"Diffusion process","level":3,"score":0.29190000891685486},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.2806999981403351},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27480000257492065},{"id":"https://openalex.org/C203504353","wikidata":"https://www.wikidata.org/wiki/Q4765461","display_name":"Anisotropic diffusion","level":3,"score":0.26429998874664307},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.26190000772476196}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2025.3643915","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3643915","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3940598769","display_name":null,"funder_award_id":"62332010","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5412215987","display_name":null,"funder_award_id":"CUC24QT19","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5786042436","display_name":null,"funder_award_id":"CUC24GF06","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6700167145","display_name":null,"funder_award_id":"CUC23CGJ24","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Diffusion":[0],"model-based":[1],"networks":[2],"have":[3,14],"been":[4],"widely":[5],"applied":[6],"in":[7,20,135],"the":[8,26,38,44,49,115],"field":[9],"of":[10,28,40,52],"image":[11,21,132],"generation":[12],"and":[13,47,82,92,138],"gradually":[15],"demonstrated":[16],"a":[17,69,88],"strong":[18],"potential":[19],"colorization":[22,30,45,116,133,143],"tasks.":[23],"However,":[24],"despite":[25],"emergence":[27],"various":[29],"diffusion":[31,72],"models,":[32],"two":[33],"major":[34],"challenges":[35],"remain:":[36],"(1)":[37],"lack":[39],"effective":[41],"control":[42,58,98],"over":[43,99],"process":[46],"(2)":[48],"prevalent":[50],"issue":[51],"color":[53,100,120],"bleeding.":[54],"Integrating":[55],"suitable":[56],"conditional":[57],"can":[59],"effectively":[60,110],"alleviate":[61],"these":[62],"challenges.":[63],"To":[64],"this":[65],"end,":[66],"we":[67,86,103],"propose":[68],"unified":[70],"multi-modal":[71],"model":[73],"that":[74,90,127],"harnesses":[75],"diverse":[76],"modality":[77],"information":[78,113],"to":[79,109],"achieve":[80],"flexible":[81],"high-quality":[83],"colorization.":[84],"Specifically,":[85],"introduce":[87],"Stroke-Adapter":[89],"extracts":[91],"integrates":[93],"stroke":[94],"prompt,":[95],"enhancing":[96],"user":[97],"distribution.":[101],"Additionally,":[102],"design":[104],"an":[105],"Edge-Guided":[106],"Attention":[107],"mechanism":[108],"inject":[111],"edge":[112],"into":[114],"process,":[117],"significantly":[118],"reducing":[119],"bleeding":[121],"artifacts.":[122],"Extensive":[123],"comparative":[124],"experiments":[125],"demonstrate":[126],"our":[128],"method":[129],"outperforms":[130],"state-of-the-art":[131],"approaches":[134],"both":[136],"qualitative":[137],"quantitative":[139],"evaluations,":[140],"achieving":[141],"superior":[142],"results":[144],"with":[145],"enhanced":[146],"controllability.":[147]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-15T00:00:00"}
