{"id":"https://openalex.org/W7118945935","doi":"https://doi.org/10.1109/tcsvt.2026.3651269","title":"Da Yu: Toward ASV-Based Image Captioning for Waterway Surveillance and Scene Understanding","display_name":"Da Yu: Toward ASV-Based Image Captioning for Waterway Surveillance and Scene Understanding","publication_year":2026,"publication_date":"2026-01-06","ids":{"openalex":"https://openalex.org/W7118945935","doi":"https://doi.org/10.1109/tcsvt.2026.3651269"},"language":null,"primary_location":{"id":"doi:10.1109/tcsvt.2026.3651269","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2026.3651269","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079695819","display_name":"Runwei Guan","orcid":"https://orcid.org/0000-0003-4013-2107"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Runwei Guan","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0003-4013-2107","affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104264340","display_name":"Ningwei Ouyang","orcid":null},"institutions":[{"id":"https://openalex.org/I69356397","display_name":"Xi\u2019an Jiaotong-Liverpool University","ror":"https://ror.org/03zmrmn05","country_code":"CN","type":"education","lineage":["https://openalex.org/I69356397"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ningwei Ouyang","raw_affiliation_strings":["School of Advanced Technology, Xi&#x2019;an Jiaotong-Liverpool University, Suzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Advanced Technology, Xi&#x2019;an Jiaotong-Liverpool University, Suzhou, China","institution_ids":["https://openalex.org/I69356397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060953814","display_name":"T. T. Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Tianhao Xu","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China"],"raw_orcid":"https://orcid.org/0009-0005-4809-0943","affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122005449","display_name":"Shaofeng Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]},{"id":"https://openalex.org/I4210162190","display_name":"China University of Petroleum, East China","ror":"https://ror.org/05gbn2817","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210162190"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaofeng Liang","raw_affiliation_strings":["Qingdao Institute of Software, China University of Petroleum (East China), Qingdao, China"],"raw_orcid":"https://orcid.org/0009-0005-8134-2188","affiliations":[{"raw_affiliation_string":"Qingdao Institute of Software, China University of Petroleum (East China), Qingdao, China","institution_ids":["https://openalex.org/I4210162190","https://openalex.org/I4210128818"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113526553","display_name":"Wei Dai","orcid":null},"institutions":[{"id":"https://openalex.org/I69356397","display_name":"Xi\u2019an Jiaotong-Liverpool University","ror":"https://ror.org/03zmrmn05","country_code":"CN","type":"education","lineage":["https://openalex.org/I69356397"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Dai","raw_affiliation_strings":["School of Advanced Technology, Xi&#x2019;an Jiaotong-Liverpool University, Suzhou, China"],"raw_orcid":"https://orcid.org/0009-0008-9877-6363","affiliations":[{"raw_affiliation_string":"School of Advanced Technology, Xi&#x2019;an Jiaotong-Liverpool University, Suzhou, China","institution_ids":["https://openalex.org/I69356397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100841185","display_name":"Yafeng Sun","orcid":"https://orcid.org/0000-0002-2043-2949"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yafeng Sun","raw_affiliation_strings":["School of Cyberspace Security, University of Science and Technology of China, Hefei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Cyberspace Security, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102085443","display_name":"S. S. Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Shang Gao","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0002-9597-6350","affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086484127","display_name":"Songning Lai","orcid":"https://orcid.org/0009-0007-3132-9414"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Songning Lai","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China"],"raw_orcid":"https://orcid.org/0009-0007-3132-9414","affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017442556","display_name":"Shanliang Yao","orcid":"https://orcid.org/0000-0001-7596-3598"},"institutions":[{"id":"https://openalex.org/I4210157719","display_name":"Yancheng Institute of Technology","ror":"https://ror.org/04y8njc86","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210157719"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shanliang Yao","raw_affiliation_strings":["School of Information Engineering, Yancheng Institute of Technology, Yancheng, China"],"raw_orcid":"https://orcid.org/0000-0001-7596-3598","affiliations":[{"raw_affiliation_string":"School of Information Engineering, Yancheng Institute of Technology, Yancheng, China","institution_ids":["https://openalex.org/I4210157719"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101977364","display_name":"Xinyue Hu","orcid":"https://orcid.org/0009-0007-6598-3764"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Xuming Hu","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061907283","display_name":"Ryan Wen Liu","orcid":"https://orcid.org/0000-0002-1591-5583"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ryan Wen Liu","raw_affiliation_strings":["School of Navigation, Wuhan University of Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-1591-5583","affiliations":[{"raw_affiliation_string":"School of Navigation, Wuhan University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121443082","display_name":"Yutao Yue","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yutao Yue","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0003-4532-0924","affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028772576","display_name":"Hui Hua Xiong","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]},{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Hui Xiong","raw_affiliation_strings":["The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China"],"raw_orcid":"https://orcid.org/0000-0001-6016-6465","affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology (Guangzhou), Guangdong, China","institution_ids":["https://openalex.org/I200769079","https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":13,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":35.3848,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.99278266,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"36","issue":"5","first_page":"6168","last_page":"6183"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9681000113487244,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9681000113487244,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.013700000010430813,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.0017999999690800905,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.9671000242233276},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6154000163078308},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5950000286102295},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.49149999022483826},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.41990000009536743},{"id":"https://openalex.org/keywords/skyline","display_name":"Skyline","score":0.4050999879837036},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.3546000123023987},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.35339999198913574}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.9671000242233276},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7799999713897705},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6154000163078308},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5950000286102295},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.49149999022483826},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48820000886917114},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.41990000009536743},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.40939998626708984},{"id":"https://openalex.org/C2780757406","wikidata":"https://www.wikidata.org/wiki/Q465837","display_name":"Skyline","level":2,"score":0.4050999879837036},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3716999888420105},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.3546000123023987},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.35339999198913574},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.3456999957561493},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.314300000667572},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.296099990606308},{"id":"https://openalex.org/C136643341","wikidata":"https://www.wikidata.org/wiki/Q1361526","display_name":"Reachability","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2946000099182129},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.29280000925064087},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.2913999855518341},{"id":"https://openalex.org/C172367668","wikidata":"https://www.wikidata.org/wiki/Q6504956","display_name":"Data visualization","level":3,"score":0.2906000018119812},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.2743000090122223},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.260699987411499},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.25780001282691956}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2026.3651269","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2026.3651269","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.47023558616638184}],"awards":[{"id":"https://openalex.org/G1782397203","display_name":null,"funder_award_id":"92370204","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2589200640","display_name":null,"funder_award_id":"2023YFF0725001","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5392888755","display_name":null,"funder_award_id":"52422111","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6130982387","display_name":null,"funder_award_id":"2023B1515120057","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320326667","display_name":"Bureau of Education of Guangzhou Municipality","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Automated":[0],"waterway":[1,22,41,84],"environment":[2],"perception":[3,23,30,44],"is":[4,193],"crucial":[5],"for":[6,83,98,128,150],"enabling":[7],"unmanned":[8],"surface":[9],"vessels":[10],"(USVs)":[11],"to":[12,37,49,74,166],"understand":[13],"their":[14],"surroundings":[15],"and":[16,46,59,101,153,179,186],"make":[17],"informed":[18],"decisions.":[19],"Most":[20],"existing":[21],"models":[24,47,68,183],"primarily":[25],"focus":[26],"on":[27,88,184],"instance-level":[28],"object":[29],"paradigms":[31],"(e.g.,":[32],"detection,":[33],"segmentation).":[34],"However,":[35],"due":[36],"the":[38,64,77,148,163],"complexity":[39],"of":[40,54,66,157],"environments,":[42],"current":[43],"datasets":[45],"fail":[48],"achieve":[50],"global":[51,152],"semantic":[52],"understanding":[53],"waterways,":[55],"limiting":[56],"large-scale":[57],"monitoring":[58],"structured":[60],"log":[61],"generation.":[62],"With":[63],"advancement":[65],"vision-language":[67],"(VLMs),":[69],"we":[70,118,131],"leverage":[71],"image":[72],"captioning":[73,79,189],"introduce":[75],"WaterCaption,":[76],"first":[78],"dataset":[80],"specifically":[81],"designed":[82],"environments.":[85],"WaterCaption":[86,185],"focuses":[87],"fine-grained,":[89],"multi-region":[90],"long-text":[91],"descriptions,":[92],"providing":[93],"a":[94,133],"new":[95],"research":[96],"direction":[97],"visual":[99,158],"geo-understanding":[100],"spatial":[102],"scene":[103],"cognition.":[104],"Exactly,":[105],"it":[106],"includes":[107],"20.2k":[108],"image-text":[109],"pair":[110],"data":[111],"with":[112,147],"1.8":[113],"million":[114],"vocabulary":[115],"size.":[116],"Additionally,":[117],"propose":[119,132],"Da":[120,171],"Yu,":[121],"an":[122,174],"edge-deployable":[123],"multi-modal":[124],"large":[125],"language":[126],"model":[127],"USVs,":[129],"where":[130],"novel":[134],"vision-to-language":[135],"projector":[136],"called":[137],"Nano":[138],"Transformer":[139],"Adaptor":[140],"(NTA).":[141],"NTA":[142],"effectively":[143],"balances":[144],"computational":[145],"efficiency":[146],"capacity":[149],"both":[151],"fine-grained":[154],"local":[155],"modeling":[156],"features,":[159],"thereby":[160],"significantly":[161],"enhancing":[162],"model\u2019s":[164],"ability":[165],"generate":[167],"long-form":[168],"textual":[169],"outputs.":[170],"Yu":[172],"achieves":[173],"optimal":[175],"balance":[176],"between":[177],"performance":[178],"efficiency,":[180],"surpassing":[181],"state-of-the-art":[182],"several":[187],"other":[188],"benchmarks.":[190],"The":[191],"project":[192],"available":[194],"at":[195],"https://github.com/GuanRunwei/WaterCaption.":[196]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-08T00:00:00"}
