{"id":"https://openalex.org/W4410949490","doi":"https://doi.org/10.1109/tip.2025.3573471","title":"MCT-CCDiff: Context-Aware Contrastive Diffusion Model With Mediator-Bridging Cross-Modal Transformer for Image Change Captioning","display_name":"MCT-CCDiff: Context-Aware Contrastive Diffusion Model With Mediator-Bridging Cross-Modal Transformer for Image Change Captioning","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4410949490","doi":"https://doi.org/10.1109/tip.2025.3573471","pmid":"https://pubmed.ncbi.nlm.nih.gov/40456068"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2025.3573471","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2025.3573471","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111228016","display_name":"Jinhong Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinhong Hu","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"],"raw_orcid":"https://orcid.org/0009-0009-4048-5827","affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089792276","display_name":"Guojin Zhong","orcid":"https://orcid.org/0000-0002-6054-3694"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guojin Zhong","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0002-6054-3694","affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100603557","display_name":"Jin Yuan","orcid":"https://orcid.org/0000-0002-9600-7789"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jin Yuan","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0002-9600-7789","affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101920502","display_name":"Wenbo Pan","orcid":"https://orcid.org/0000-0002-5527-2914"},"institutions":[{"id":"https://openalex.org/I4210126257","display_name":"CRRC (China)","ror":"https://ror.org/033g21894","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210126257"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbo Pan","raw_affiliation_strings":["CRRC Zhuzhou Institute Company Ltd., Zhuzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-5527-2914","affiliations":[{"raw_affiliation_string":"CRRC Zhuzhou Institute Company Ltd., Zhuzhou, China","institution_ids":["https://openalex.org/I4210126257"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100459926","display_name":"Xiaoping Wang","orcid":"https://orcid.org/0000-0002-9466-2667"},"institutions":[{"id":"https://openalex.org/I16609230","display_name":"Hunan University","ror":"https://ror.org/05htk5m33","country_code":"CN","type":"education","lineage":["https://openalex.org/I16609230"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoping Wang","raw_affiliation_strings":["College of Computer Science and Electronic Engineering, Hunan University, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Electronic Engineering, Hunan University, Changsha, China","institution_ids":["https://openalex.org/I16609230"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08864734,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"34","issue":null,"first_page":"3294","last_page":"3308"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7083542346954346},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.6463222503662109},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6160587072372437},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5266711115837097},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.5045253038406372},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4938109815120697},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.46516090631484985},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.4583754539489746},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4218292832374573},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.34814566373825073},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.22249189019203186},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11531105637550354}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7083542346954346},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.6463222503662109},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6160587072372437},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5266711115837097},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.5045253038406372},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4938109815120697},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.46516090631484985},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.4583754539489746},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4218292832374573},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.34814566373825073},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.22249189019203186},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11531105637550354},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2025.3573471","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2025.3573471","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:40456068","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40456068","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.41999998688697815,"display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G8637866159","display_name":null,"funder_award_id":"62272157","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W1797268635","https://openalex.org/W1895577753","https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2194775991","https://openalex.org/W2561715562","https://openalex.org/W2896457183","https://openalex.org/W2949554687","https://openalex.org/W2962784628","https://openalex.org/W2963807318","https://openalex.org/W2964196083","https://openalex.org/W2970419734","https://openalex.org/W2971157635","https://openalex.org/W2988981892","https://openalex.org/W3034961030","https://openalex.org/W3108170342","https://openalex.org/W3140300848","https://openalex.org/W3155889037","https://openalex.org/W3159583533","https://openalex.org/W3175933895","https://openalex.org/W3176470992","https://openalex.org/W4206621053","https://openalex.org/W4225739172","https://openalex.org/W4308167501","https://openalex.org/W4309805488","https://openalex.org/W4312388283","https://openalex.org/W4321608137","https://openalex.org/W4362013208","https://openalex.org/W4366996284","https://openalex.org/W4377866427","https://openalex.org/W4382202677","https://openalex.org/W4382240730","https://openalex.org/W4382459038","https://openalex.org/W4385245566","https://openalex.org/W4386072307","https://openalex.org/W4386076323","https://openalex.org/W4386185600","https://openalex.org/W4387969123","https://openalex.org/W4390871964","https://openalex.org/W4391791458","https://openalex.org/W4395001637","https://openalex.org/W4401044003","https://openalex.org/W4401691645","https://openalex.org/W4402671623","https://openalex.org/W4402961822","https://openalex.org/W6638319203","https://openalex.org/W6678262379","https://openalex.org/W6679045638","https://openalex.org/W6682631176","https://openalex.org/W6766978945","https://openalex.org/W6778883912","https://openalex.org/W6779823529","https://openalex.org/W6788990321","https://openalex.org/W6796163713","https://openalex.org/W6798447524","https://openalex.org/W6810165261","https://openalex.org/W6838815585","https://openalex.org/W6844872470","https://openalex.org/W6846176957","https://openalex.org/W6846472937","https://openalex.org/W6846827642","https://openalex.org/W6851592950","https://openalex.org/W6852748377","https://openalex.org/W6852911440","https://openalex.org/W6855350031","https://openalex.org/W6859765280"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W2963177403","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2330246314","https://openalex.org/W2949522393","https://openalex.org/W4289422896"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,10,170,179],"diffusion":[3,137],"models":[4],"(DMs)":[5],"have":[6],"showcased":[7],"superior":[8],"capabilities":[9],"generating":[11,72],"images":[12],"and":[13,25,62,102,115],"text.":[14],"This":[15],"paper":[16],"first":[17],"introduces":[18,86],"DMs":[19],"for":[20,77,150],"image":[21],"change":[22],"captioning":[23],"(ICC)":[24],"proposes":[26],"a":[27,52,87,107,135,141,176],"novel":[28],"Context-aware":[29],"Contrastive":[30,55],"Diffusion":[31],"model":[32],"with":[33],"Mediator-bridging":[34,88],"Cross-modal":[35,89],"Transformer":[36,90],"(MCT-CCDiff)":[37],"to":[38,65,93,124,146],"accurately":[39,80],"predict":[40,81],"visual":[41,82,100,113,184],"difference":[42,83,185],"descriptions":[43],"conditioned":[44],"on":[45,157],"two":[46],"similar":[47],"images.":[48],"Technically,":[49],"MCT-CCDiff":[50,85,165],"develops":[51],"Text":[53],"Embedding":[54],"Loss":[56],"(TECL)":[57],"that":[58,163],"leverages":[59],"both":[60],"positive":[61],"negative":[63],"samples":[64],"more":[66,73],"effectively":[67],"distinguish":[68],"text":[69,75,104,152],"embeddings,":[70],"thus":[71],"discriminative":[74],"representations":[76],"ICC.":[78],"To":[79],"descriptions,":[84],"(MCTrans)":[91],"designed":[92],"efficiently":[94],"explore":[95],"the":[96,127,171,180],"cross-modal":[97],"correlations":[98],"between":[99],"differences":[101],"corresponding":[103],"by":[105,134],"using":[106],"lightweight":[108],"mediator,":[109],"mitigating":[110],"interference":[111],"from":[112],"redundancy":[114],"reducing":[116],"interaction":[117],"overhead.":[118],"Additionally,":[119],"it":[120],"incorporates":[121],"context-augmented":[122],"denoising":[123],"further":[125],"understand":[126],"contextual":[128],"relationships":[129],"within":[130],"caption":[131],"words":[132],"implemented":[133],"revised":[136],"loss,":[138],"which":[139],"provides":[140],"tighter":[142],"optimization":[143,148],"bound,":[144],"leading":[145],"enhanced":[147],"effects":[149],"high-quality":[151],"generation.":[153],"Extensive":[154],"experiments":[155],"conducted":[156],"four":[158],"benchmark":[159],"datasets":[160],"clearly":[161],"demonstrate":[162],"our":[164],"significantly":[166],"outperforms":[167],"state-of-the-art":[168],"methods":[169],"field":[172],"of":[173,182],"ICC,":[174],"marking":[175],"notable":[177],"advancement":[178],"generation":[181],"precise":[183],"descriptions.":[186]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
