{"id":"https://openalex.org/W4401510256","doi":"https://doi.org/10.3390/rs16162961","title":"VCC-DiffNet: Visual Conditional Control Diffusion Network for Remote Sensing Image Captioning","display_name":"VCC-DiffNet: Visual Conditional Control Diffusion Network for Remote Sensing Image Captioning","publication_year":2024,"publication_date":"2024-08-12","ids":{"openalex":"https://openalex.org/W4401510256","doi":"https://doi.org/10.3390/rs16162961"},"language":"en","primary_location":{"id":"doi:10.3390/rs16162961","is_oa":true,"landing_page_url":"https://doi.org/10.3390/rs16162961","pdf_url":null,"source":{"id":"https://openalex.org/S43295729","display_name":"Remote Sensing","issn_l":"2072-4292","issn":["2072-4292"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Remote Sensing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.3390/rs16162961","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101639319","display_name":"Qimin Cheng","orcid":null},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qimin Cheng","raw_affiliation_strings":["School of Electronic Information and Communications, Huazhong University of Science and Technology, Wuhan 430074, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Communications, Huazhong University of Science and Technology, Wuhan 430074, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111103473","display_name":"Yuqi Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuqi Xu","raw_affiliation_strings":["School of Electronic Information and Communications, Huazhong University of Science and Technology, Wuhan 430074, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Communications, Huazhong University of Science and Technology, Wuhan 430074, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101795099","display_name":"Ziyang Huang","orcid":"https://orcid.org/0000-0003-2769-2759"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziyang Huang","raw_affiliation_strings":["School of Electronic Information and Communications, Huazhong University of Science and Technology, Wuhan 430074, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Information and Communications, Huazhong University of Science and Technology, Wuhan 430074, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101639319"],"corresponding_institution_ids":["https://openalex.org/I47720641"],"apc_list":{"value":2500,"currency":"CHF","value_usd":2707},"apc_paid":{"value":2500,"currency":"CHF","value_usd":2707},"fwci":1.0497,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.77682297,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"16","issue":"16","first_page":"2961","last_page":"2961"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6535532474517822},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.6246477365493774},{"id":"https://openalex.org/keywords/remote-sensing","display_name":"Remote sensing","score":0.49577102065086365},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.38438305258750916},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37863993644714355},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.36248579621315},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.19716474413871765}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6535532474517822},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.6246477365493774},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.49577102065086365},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38438305258750916},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37863993644714355},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.36248579621315},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.19716474413871765}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/rs16162961","is_oa":true,"landing_page_url":"https://doi.org/10.3390/rs16162961","pdf_url":null,"source":{"id":"https://openalex.org/S43295729","display_name":"Remote Sensing","issn_l":"2072-4292","issn":["2072-4292"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Remote Sensing","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:531bc0f3d10c4be5ac90c21d794010dd","is_oa":true,"landing_page_url":"https://doaj.org/article/531bc0f3d10c4be5ac90c21d794010dd","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Remote Sensing, Vol 16, Iss 16, p 2961 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/rs16162961","is_oa":true,"landing_page_url":"https://doi.org/10.3390/rs16162961","pdf_url":null,"source":{"id":"https://openalex.org/S43295729","display_name":"Remote Sensing","issn_l":"2072-4292","issn":["2072-4292"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Remote Sensing","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7599999904632568}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1956340063","https://openalex.org/W2064675550","https://openalex.org/W2101105183","https://openalex.org/W2123301721","https://openalex.org/W2510520237","https://openalex.org/W2565639579","https://openalex.org/W2603566245","https://openalex.org/W2779054585","https://openalex.org/W2911584214","https://openalex.org/W2920981979","https://openalex.org/W2963434219","https://openalex.org/W2973586224","https://openalex.org/W2979924880","https://openalex.org/W3034858767","https://openalex.org/W3035463379","https://openalex.org/W3042724941","https://openalex.org/W3100245404","https://openalex.org/W3174046981","https://openalex.org/W3194015448","https://openalex.org/W3205981128","https://openalex.org/W3210150990","https://openalex.org/W3213119051","https://openalex.org/W4206111836","https://openalex.org/W4211112734","https://openalex.org/W4220716465","https://openalex.org/W4283216168","https://openalex.org/W4292968451","https://openalex.org/W4310459386","https://openalex.org/W4375868765","https://openalex.org/W4385767932","https://openalex.org/W4385864244","https://openalex.org/W4386072307","https://openalex.org/W4391226826","https://openalex.org/W4391341588","https://openalex.org/W4391935992","https://openalex.org/W6682631176","https://openalex.org/W6797423251"],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2775347418","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Pioneering":[0],"remote":[1,74],"sensing":[2,75],"image":[3,59,160],"captioning":[4,60],"(RSIC)":[5],"works":[6],"use":[7],"autoregressive":[8,182],"decoding":[9,53],"for":[10,97],"fluent":[11],"and":[12,20,82,204],"coherent":[13],"sentences":[14],"but":[15,64],"suffer":[16],"from":[17],"high":[18,21],"latency":[19],"computation":[22],"costs.":[23],"In":[24],"contrast,":[25],"non-autoregressive":[26,52,132,172],"approaches":[27],"improve":[28],"inference":[29],"speed":[30],"by":[31,69],"predicting":[32],"multiple":[33],"tokens":[34],"simultaneously,":[35],"though":[36],"at":[37],"the":[38,70,84,100,119,129,159],"cost":[39],"of":[40,46,73,124,128],"performance":[41],"due":[42],"to":[43,117,134,153],"a":[44,110,137,199],"lack":[45],"sequential":[47],"dependencies.":[48],"Recently,":[49],"diffusion":[50,95,130],"model-based":[51,131],"has":[54],"shown":[55],"promise":[56],"in":[57,184,192,197,202,208],"natural":[58],"with":[61,158],"iterative":[62],"refinement,":[63],"its":[65],"effectiveness":[66],"is":[67],"limited":[68],"intrinsic":[71],"characteristics":[72],"images,":[76],"which":[77],"complicate":[78],"robust":[79],"input":[80,127],"construction":[81],"affect":[83],"description":[85],"accuracy.":[86],"To":[87],"overcome":[88],"these":[89],"challenges,":[90],"we":[91,108,142],"propose":[92,109,143],"an":[93,144,189,194,205],"innovative":[94],"model":[96],"RSIC,":[98],"named":[99],"Visual":[101],"Conditional":[102],"Control":[103],"Diffusion":[104],"Network":[105],"(VCC-DiffNet).":[106],"Specifically,":[107],"Refined":[111],"Multi-scale":[112],"Feature":[113],"Extraction":[114],"(RMFE)":[115],"module":[116],"extract":[118],"discernible":[120],"visual":[121],"context":[122],"features":[123],"RSIs":[125],"as":[126],"decoder":[133],"conditionally":[135],"control":[136],"multi-step":[138],"denoising":[139],"process.":[140],"Furthermore,":[141],"Interactive":[145],"Enhanced":[146],"Decoder":[147],"(IE-Decoder)":[148],"utilizing":[149],"dual":[150],"image\u2013description":[151],"interactions":[152],"generate":[154],"descriptions":[155],"finely":[156],"aligned":[157],"content.":[161],"Experiments":[162],"conducted":[163],"on":[164],"four":[165],"representative":[166],"RSIC":[167],"datasets":[168],"demonstrate":[169],"that":[170],"our":[171],"VCC-DiffNet":[173],"performs":[174],"comparably":[175],"to,":[176],"or":[177],"even":[178],"better":[179],"than,":[180],"popular":[181],"baselines":[183],"classic":[185],"metrics,":[186],"achieving":[187],"around":[188],"8.22\u00d7":[190],"speedup":[191,196,201,207],"Sydney-Captions,":[193],"11.61\u00d7":[195],"UCM-Captions,":[198],"15.20\u00d7":[200],"RSICD,":[203],"8.13\u00d7":[206],"NWPU-Captions.":[209]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
