{"id":"https://openalex.org/W7154271505","doi":"https://doi.org/10.1109/tip.2026.3682126","title":"ViT-UWA: Vision Transformer Underwater-Adapter for Dense Predictions Beneath the Water Surface","display_name":"ViT-UWA: Vision Transformer Underwater-Adapter for Dense Predictions Beneath the Water Surface","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7154271505","doi":"https://doi.org/10.1109/tip.2026.3682126","pmid":"https://pubmed.ncbi.nlm.nih.gov/41979972"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2026.3682126","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2026.3682126","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013880628","display_name":"Yuheng Jia","orcid":"https://orcid.org/0000-0002-3907-6550"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuheng Jia","raw_affiliation_strings":["School of Computer Science and Engineering and the Key Laboratory of New Generation Artificial Intelligence Technology and Its Interdisciplinary Applications, Ministry of Education, Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-3907-6550","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering and the Key Laboratory of New Generation Artificial Intelligence Technology and Its Interdisciplinary Applications, Ministry of Education, Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133622574","display_name":"Qirui Lin","orcid":"https://orcid.org/0009-0007-1631-6914"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qirui Lin","raw_affiliation_strings":["College of Software Engineering and the Key Laboratory of New Generation Artificial Intelligence Technology and Its Interdisciplinary Applications, Ministry of Education, Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0009-0007-1631-6914","affiliations":[{"raw_affiliation_string":"College of Software Engineering and the Key Laboratory of New Generation Artificial Intelligence Technology and Its Interdisciplinary Applications, Ministry of Education, Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hua Li","orcid":"https://orcid.org/0000-0003-0740-0691"},"institutions":[{"id":"https://openalex.org/I20942203","display_name":"Hainan University","ror":"https://ror.org/03q648j11","country_code":"CN","type":"education","lineage":["https://openalex.org/I20942203"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hua Li","raw_affiliation_strings":["School of Computer Science and Technology, Hainan University, Haikou, China"],"raw_orcid":"https://orcid.org/0000-0003-0740-0691","affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Hainan University, Haikou, China","institution_ids":["https://openalex.org/I20942203"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yutong Li","orcid":"https://orcid.org/0009-0000-9425-1553"},"institutions":[{"id":"https://openalex.org/I20942203","display_name":"Hainan University","ror":"https://ror.org/03q648j11","country_code":"CN","type":"education","lineage":["https://openalex.org/I20942203"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yutong Li","raw_affiliation_strings":["School of Information and Communication Engineering, Hainan University, Haikou, China"],"raw_orcid":"https://orcid.org/0009-0000-9425-1553","affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, Hainan University, Haikou, China","institution_ids":["https://openalex.org/I20942203"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008386708","display_name":"Sam Kwong","orcid":"https://orcid.org/0000-0001-7484-7261"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sam Kwong","raw_affiliation_strings":["School of Data Science, Lingnan University, Hong Kong, SAR, China"],"raw_orcid":"https://orcid.org/0000-0001-7484-7261","affiliations":[{"raw_affiliation_string":"School of Data Science, Lingnan University, Hong Kong, SAR, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091558139","display_name":"Runmin Cong","orcid":"https://orcid.org/0000-0003-0972-4008"},"institutions":[{"id":"https://openalex.org/I154099455","display_name":"Shandong University","ror":"https://ror.org/0207yh398","country_code":"CN","type":"education","lineage":["https://openalex.org/I154099455"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Runmin Cong","raw_affiliation_strings":["School of Control Science and Engineering, Shandong University, Jinan, China"],"raw_orcid":"https://orcid.org/0000-0003-0972-4008","affiliations":[{"raw_affiliation_string":"School of Control Science and Engineering, Shandong University, Jinan, China","institution_ids":["https://openalex.org/I154099455"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5013880628"],"corresponding_institution_ids":["https://openalex.org/I76569877"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.81085965,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"35","issue":null,"first_page":"4012","last_page":"4026"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.5648000240325928,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.5648000240325928,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11192","display_name":"Underwater Vehicles and Communication Systems","score":0.12110000103712082,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12697","display_name":"Water Quality Monitoring Technologies","score":0.04490000009536743,"subfield":{"id":"https://openalex.org/subfields/2312","display_name":"Water Science and Technology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.3862999975681305},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.38199999928474426},{"id":"https://openalex.org/keywords/surface","display_name":"Surface (topology)","score":0.38089999556541443},{"id":"https://openalex.org/keywords/machine-vision","display_name":"Machine vision","score":0.3303000032901764},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.3118000030517578},{"id":"https://openalex.org/keywords/surface-fitting","display_name":"Surface fitting","score":0.2903999984264374}],"concepts":[{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.3862999975681305},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.38199999928474426},{"id":"https://openalex.org/C2776799497","wikidata":"https://www.wikidata.org/wiki/Q484298","display_name":"Surface (topology)","level":2,"score":0.38089999556541443},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.37869998812675476},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.3407000005245209},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.3303000032901764},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.3255999982357025},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.3118000030517578},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.3059000074863434},{"id":"https://openalex.org/C2984999661","wikidata":"https://www.wikidata.org/wiki/Q603159","display_name":"Surface fitting","level":3,"score":0.2903999984264374},{"id":"https://openalex.org/C118365302","wikidata":"https://www.wikidata.org/wiki/Q4817115","display_name":"Atmospheric model","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C20885615","wikidata":"https://www.wikidata.org/wiki/Q825595","display_name":"Surface reconstruction","level":3,"score":0.2676999866962433},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.26429998874664307},{"id":"https://openalex.org/C84174578","wikidata":"https://www.wikidata.org/wiki/Q889796","display_name":"Surface wave","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C107365816","wikidata":"https://www.wikidata.org/wiki/Q114817","display_name":"Surface roughness","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C72293138","wikidata":"https://www.wikidata.org/wiki/Q909741","display_name":"Temperature measurement","level":2,"score":0.2533999979496002},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.25220000743865967}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2026.3682126","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2026.3682126","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:41979972","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41979972","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/14","score":0.5550382137298584,"display_name":"Life below water"}],"awards":[{"id":"https://openalex.org/G1914386590","display_name":null,"funder_award_id":"62471278","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4873994067","display_name":null,"funder_award_id":"STG5/E-103/24-R","funder_id":"https://openalex.org/F4320321592","funder_display_name":"Research Grants Council, University Grants Committee"},{"id":"https://openalex.org/G5106885279","display_name":null,"funder_award_id":"62461018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6467542141","display_name":null,"funder_award_id":"U24A20322","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6610201358","display_name":null,"funder_award_id":"62576094","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321592","display_name":"Research Grants Council, University Grants Committee","ror":"https://ror.org/00djwmt25"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision":[0,46],"Transformer":[1,47],"(ViT)":[2],"and":[3,33,53,90,138,154,168],"its":[4],"variants":[5],"have":[6],"witnessed":[7],"a":[8,101,108],"significant":[9],"success":[10],"in":[11,19,36,118],"computer":[12],"vision.":[13],"However,":[14],"their":[15],"performance":[16],"may":[17],"degrade":[18],"underwater":[20,29,37,58,80,96,145],"dense":[21,59,121,146],"prediction":[22,60,122,147],"tasks":[23],"due":[24],"to":[25,75,82,106],"challenges":[26],"like":[27],"complex":[28],"environments,":[30],"quality":[31],"degradation,":[32],"light":[34],"scattering":[35],"images.":[38,97],"To":[39],"solve":[40],"this":[41],"problem,":[42],"we":[43,68,99,130],"propose":[44,100],"the":[45,50,83,125,177],"Underwater-Adapter":[48],"(ViT-UWA),":[49],"first":[51,69],"detail-focused":[52,109],"adapted":[54],"ViT":[55,137],"backbone":[56],"for":[57],"tasks,":[61,148],"without":[62],"requiring":[63],"task-specific":[64],"pretraining.":[65],"In":[66],"ViT-UWA,":[67],"introduce":[70],"High-frequency":[71],"Components":[72],"Prior":[73],"(HFCP)":[74],"add":[76],"high-frequency":[77,93],"information":[78,94],"of":[79,95,120,179],"images":[81],"plain":[84],"ViT,":[85],"which":[86,114,175],"can":[87,115],"help":[88],"recover":[89],"capture":[91],"lost":[92],"Then,":[98],"Detail":[102],"Aware":[103],"Module":[104],"(DAM)":[105],"obtain":[107],"multi-scale":[110],"convolutional":[111],"feature":[112,133],"pyramid,":[113],"be":[116],"used":[117],"kinds":[119],"tasks.":[123],"Through":[124],"ViT-DAM":[126],"Cross":[127],"Fusion":[128],"(VDCF),":[129],"achieve":[131],"bidirectional":[132],"cross":[134],"fusion":[135],"between":[136],"DAM.":[139],"We":[140],"evaluate":[141],"ViT-UWA":[142],"on":[143,172],"multiple":[144],"including":[149],"semantic":[150],"segmentation,":[151,153],"instance":[152],"object":[155],"detection.":[156],"With":[157],"only":[158],"ImageNet-22K":[159],"pretraining,":[160],"our":[161,180],"ViT-UWA-B":[162],"yields":[163],"state-of-the-art":[164],"46.4":[165],"box":[166],"AP":[167,171],"44.2":[169],"mask":[170],"USIS10K":[173],"dataset,":[174],"demonstrates":[176],"superiority":[178],"method.":[181],"Our":[182],"code":[183],"is":[184],"available":[185],"at":[186],"https://github.com/Linqirui/ViT-UWA.":[187]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-04-15T00:00:00"}
