{"id":"https://openalex.org/W4390539794","doi":"https://doi.org/10.3390/rs16010196","title":"Cross-Modal Retrieval and Semantic Refinement for Remote Sensing Image Captioning","display_name":"Cross-Modal Retrieval and Semantic Refinement for Remote Sensing Image Captioning","publication_year":2024,"publication_date":"2024-01-03","ids":{"openalex":"https://openalex.org/W4390539794","doi":"https://doi.org/10.3390/rs16010196"},"language":"en","primary_location":{"id":"doi:10.3390/rs16010196","is_oa":true,"landing_page_url":"https://doi.org/10.3390/rs16010196","pdf_url":"https://www.mdpi.com/2072-4292/16/1/196/pdf?version=1704249166","source":{"id":"https://openalex.org/S43295729","display_name":"Remote Sensing","issn_l":"2072-4292","issn":["2072-4292"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Remote Sensing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2072-4292/16/1/196/pdf?version=1704249166","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103153352","display_name":"Zhengxin Li","orcid":"https://orcid.org/0009-0002-8988-4545"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210137199","display_name":"Aerospace Information Research Institute","ror":"https://ror.org/0419fj215","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210137199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengxin Li","raw_affiliation_strings":["Key Laboratory of Spatial Information Processing and Application System Technology, Chinese Academy of Sciences, Beijing 100190, China","School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing 101408, China","The Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing 100190, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Spatial Information Processing and Application System Technology, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing 101408, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"The Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I4210137199","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080918639","display_name":"Wenzhe Zhao","orcid":"https://orcid.org/0000-0002-5130-7219"},"institutions":[{"id":"https://openalex.org/I4210137199","display_name":"Aerospace Information Research Institute","ror":"https://ror.org/0419fj215","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210137199"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenzhe Zhao","raw_affiliation_strings":["Key Laboratory of Spatial Information Processing and Application System Technology, Chinese Academy of Sciences, Beijing 100190, China","The Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing 100190, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Spatial Information Processing and Application System Technology, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"The Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I4210137199","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114108004","display_name":"Xuanyi Du","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210137199","display_name":"Aerospace Information Research Institute","ror":"https://ror.org/0419fj215","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210137199"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuanyi Du","raw_affiliation_strings":["School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing 101408, China","The Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing 100190, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic, Electrical and Communication Engineering, University of Chinese Academy of Sciences, Beijing 101408, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"The Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I4210137199","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103115656","display_name":"Guangyao Zhou","orcid":"https://orcid.org/0009-0003-7906-5498"},"institutions":[{"id":"https://openalex.org/I4210137199","display_name":"Aerospace Information Research Institute","ror":"https://ror.org/0419fj215","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210137199"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangyao Zhou","raw_affiliation_strings":["Key Laboratory of Spatial Information Processing and Application System Technology, Chinese Academy of Sciences, Beijing 100190, China","The Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing 100190, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Spatial Information Processing and Application System Technology, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"The Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I4210137199","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100690355","display_name":"Songlin Zhang","orcid":"https://orcid.org/0000-0002-0554-6737"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210137199","display_name":"Aerospace Information Research Institute","ror":"https://ror.org/0419fj215","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210137199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Songlin Zhang","raw_affiliation_strings":["Key Laboratory of Spatial Information Processing and Application System Technology, Chinese Academy of Sciences, Beijing 100190, China","The Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing 100190, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Spatial Information Processing and Application System Technology, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I19820366"]},{"raw_affiliation_string":"The Aerospace Information Research Institute, Chinese Academy of Sciences, Beijing 100190, China","institution_ids":["https://openalex.org/I4210137199","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5080918639"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210137199"],"apc_list":{"value":2500,"currency":"CHF","value_usd":2707},"apc_paid":{"value":2500,"currency":"CHF","value_usd":2707},"fwci":3.1795,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.92549918,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"16","issue":"1","first_page":"196","last_page":"196"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.979200005531311,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8981506824493408},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8584848642349243},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5568236708641052},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.44859305024147034},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4350247085094452},{"id":"https://openalex.org/keywords/semantic-mapping","display_name":"Semantic mapping","score":0.42790770530700684},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4174913465976715},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3325866162776947},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.2397899627685547}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8981506824493408},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8584848642349243},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5568236708641052},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.44859305024147034},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4350247085094452},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.42790770530700684},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4174913465976715},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3325866162776947},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2397899627685547},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/rs16010196","is_oa":true,"landing_page_url":"https://doi.org/10.3390/rs16010196","pdf_url":"https://www.mdpi.com/2072-4292/16/1/196/pdf?version=1704249166","source":{"id":"https://openalex.org/S43295729","display_name":"Remote Sensing","issn_l":"2072-4292","issn":["2072-4292"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Remote Sensing","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:e6cc83e17bfc4cc4937843bf9269ae6d","is_oa":true,"landing_page_url":"https://doaj.org/article/e6cc83e17bfc4cc4937843bf9269ae6d","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Remote Sensing, Vol 16, Iss 1, p 196 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/rs16010196","is_oa":true,"landing_page_url":"https://doi.org/10.3390/rs16010196","pdf_url":"https://www.mdpi.com/2072-4292/16/1/196/pdf?version=1704249166","source":{"id":"https://openalex.org/S43295729","display_name":"Remote Sensing","issn_l":"2072-4292","issn":["2072-4292"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Remote Sensing","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5600000023841858}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390539794.pdf"},"referenced_works_count":50,"referenced_works":["https://openalex.org/W1514535095","https://openalex.org/W1956340063","https://openalex.org/W1980038761","https://openalex.org/W2064675550","https://openalex.org/W2086866337","https://openalex.org/W2101105183","https://openalex.org/W2110485445","https://openalex.org/W2123301721","https://openalex.org/W2506483933","https://openalex.org/W2510520237","https://openalex.org/W2603566245","https://openalex.org/W2745461083","https://openalex.org/W2761711448","https://openalex.org/W2779054585","https://openalex.org/W2896348597","https://openalex.org/W2920981979","https://openalex.org/W2925311845","https://openalex.org/W2979924880","https://openalex.org/W2993313557","https://openalex.org/W3006487741","https://openalex.org/W3011916860","https://openalex.org/W3015625772","https://openalex.org/W3017628311","https://openalex.org/W3038038411","https://openalex.org/W3046260628","https://openalex.org/W3097754216","https://openalex.org/W3100245404","https://openalex.org/W3117344638","https://openalex.org/W3135367836","https://openalex.org/W3138136606","https://openalex.org/W3154766321","https://openalex.org/W3194015448","https://openalex.org/W3196922338","https://openalex.org/W3213119051","https://openalex.org/W4206111836","https://openalex.org/W4214587440","https://openalex.org/W4293508747","https://openalex.org/W4294437149","https://openalex.org/W4303444943","https://openalex.org/W4312389717","https://openalex.org/W4317433994","https://openalex.org/W4362465355","https://openalex.org/W4377079834","https://openalex.org/W4385864244","https://openalex.org/W4386003541","https://openalex.org/W6631190155","https://openalex.org/W6725318829","https://openalex.org/W6794197716","https://openalex.org/W6800139171","https://openalex.org/W6806711163"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W3088136942","https://openalex.org/W2949362007","https://openalex.org/W2775506363","https://openalex.org/W4290852288","https://openalex.org/W4388893791","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W2949522393"],"abstract_inverted_index":{"Two-stage":[0],"remote":[1,15,54,63],"sensing":[2,16,55,64],"image":[3,175],"captioning":[4,78,129],"(RSIC)":[5],"methods":[6,28],"have":[7],"achieved":[8],"promising":[9],"results":[10],"by":[11,39],"incorporating":[12],"additional":[13],"pre-trained":[14,35],"tasks":[17,65],"to":[18,43,103,153,170],"extract":[19],"supplementary":[20,125,180],"information":[21,126,157,181],"and":[22,49,74,90,158,190,195,224],"improve":[23],"caption":[24,202],"quality.":[25],"However,":[26],"these":[27,82,113],"face":[29],"limitations":[30],"in":[31,53,70,112],"semantic":[32,91,121,142,149,162,188],"comprehension,":[33],"as":[34,119],"detectors/classifiers":[36],"are":[37,116,193],"constrained":[38],"predefined":[40],"labels,":[41],"leading":[42],"an":[44],"oversight":[45],"of":[46,61,107,136,174,211],"the":[47,59,77,128,134,137,147,172,178,186,209,220,222,225],"intricate":[48],"diverse":[50],"details":[51],"present":[52],"images":[56],"(RSIs).":[57],"Additionally,":[58],"handling":[60],"auxiliary":[62],"separately":[66],"can":[67],"introduce":[68,140],"challenges":[69],"ensuring":[71],"seamless":[72],"integration":[73],"alignment":[75],"with":[76,182],"process.":[79,130],"To":[80,131],"address":[81],"problems,":[83],"we":[84,97,139,207],"propose":[85],"a":[86,99,141,198],"novel":[87],"cross-modal":[88,100,199],"retrieval":[89,101],"refinement":[92,143],"(CRSR)":[93],"RSIC":[94],"method.":[95],"Specifically,":[96],"employ":[98],"model":[102],"retrieve":[104],"relevant":[105],"sentences":[106,115],"each":[108],"image.":[109],"The":[110],"words":[111],"retrieved":[114,179],"then":[117],"considered":[118],"primary":[120,148],"information,":[122,150],"providing":[123],"valuable":[124],"for":[127,201],"further":[132],"enhance":[133],"quality":[135],"captions,":[138],"module":[144],"that":[145],"refines":[146],"which":[151],"helps":[152],"filter":[154],"out":[155],"misleading":[156],"emphasize":[159],"visually":[160],"salient":[161],"information.":[163],"A":[164],"Transformer":[165],"Mapper":[166],"network":[167],"is":[168],"introduced":[169],"expand":[171],"representation":[173],"features":[176,192],"beyond":[177],"learnable":[183],"queries.":[184],"Both":[185],"refined":[187],"tokens":[189],"visual":[191],"integrated":[194],"fed":[196],"into":[197],"decoder":[200],"generation.":[203],"Through":[204],"extensive":[205],"experiments,":[206],"demonstrate":[208],"superiority":[210],"our":[212],"CRSR":[213],"method":[214],"over":[215],"existing":[216],"state-of-the-art":[217],"approaches":[218],"on":[219],"RSICD,":[221],"UCM-Captions,":[223],"Sydney-Captions":[226],"datasets":[227]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":4}],"updated_date":"2026-01-22T23:29:09.771500","created_date":"2025-10-10T00:00:00"}
