{"id":"https://openalex.org/W7136377178","doi":"https://doi.org/10.48550/arxiv.2603.12773","title":"Empowering Semantic-Sensitive Underwater Image Enhancement with VLM","display_name":"Empowering Semantic-Sensitive Underwater Image Enhancement with VLM","publication_year":2026,"publication_date":"2026-03-13","ids":{"openalex":"https://openalex.org/W7136377178","doi":"https://doi.org/10.48550/arxiv.2603.12773"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.12773","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12773","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.12773","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129550892","display_name":"Guodong Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Guodong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103447672","display_name":"Shengning Zhou","orcid":"https://orcid.org/0009-0001-8012-047X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Shengning","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014798392","display_name":"Genji Yuan","orcid":"https://orcid.org/0000-0002-8710-2266"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Genji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129631432","display_name":"Huiyu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Huiyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129418656","display_name":"Jingchun Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jingchun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129567365","display_name":"Jinjiang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jinjiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9714000225067139,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9714000225067139,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.013299999758601189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.00279999990016222,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-restoration","display_name":"Image restoration","score":0.5184999704360962},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5103999972343445},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.5084999799728394},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.5076000094413757},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5034999847412109},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.49939998984336853},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4885999858379364},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.460999995470047},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.43709999322891235}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7204999923706055},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6520000100135803},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6323999762535095},{"id":"https://openalex.org/C106430172","wikidata":"https://www.wikidata.org/wiki/Q6002272","display_name":"Image restoration","level":4,"score":0.5184999704360962},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5103999972343445},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.5084999799728394},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.5076000094413757},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5034999847412109},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.49939998984336853},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4885999858379364},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.460999995470047},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.43709999322891235},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.40070000290870667},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3935999870300293},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.38100001215934753},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.3765999972820282},{"id":"https://openalex.org/C55020928","wikidata":"https://www.wikidata.org/wiki/Q3813865","display_name":"Image quality","level":3,"score":0.36910000443458557},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.36629998683929443},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3490000069141388},{"id":"https://openalex.org/C98083399","wikidata":"https://www.wikidata.org/wiki/Q3246517","display_name":"Underwater","level":2,"score":0.33079999685287476},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.31679999828338623},{"id":"https://openalex.org/C3017601658","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image enhancement","level":3,"score":0.30730000138282776},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.288100004196167},{"id":"https://openalex.org/C123403432","wikidata":"https://www.wikidata.org/wiki/Q654068","display_name":"Visibility","level":2,"score":0.28769999742507935},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26159998774528503},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.25600001215934753},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.25529998540878296}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.12773","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12773","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.12773","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.12773","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/14","display_name":"Life below water","score":0.4635956883430481}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"recent":[1],"years,":[2],"learning-based":[3],"underwater":[4],"image":[5,77,92,132],"enhancement":[6,37],"(UIE)":[7],"techniques":[8],"have":[9],"rapidly":[10],"evolved.":[11],"However,":[12],"distribution":[13],"shifts":[14],"between":[15],"high-quality":[16],"enhanced":[17],"outputs":[18],"and":[19,114,178,184],"natural":[20],"images":[21],"can":[22],"hinder":[23],"semantic":[24,97],"cue":[25],"extraction":[26],"for":[27],"downstream":[28],"vision":[29],"tasks,":[30,180],"thereby":[31,141],"limiting":[32],"the":[33,91,104,121,143],"adaptability":[34],"of":[35,71,146],"existing":[36],"models.":[38],"To":[39,62],"address":[40],"this":[41,43],"challenge,":[42],"work":[44],"proposes":[45],"a":[46,75,81,95,108,137],"new":[47],"learning":[48],"mechanism":[49],"that":[50,152],"leverages":[51],"Vision-Language":[52],"Models":[53],"(VLMs)":[54],"to":[55,93,123,158],"empower":[56],"UIE":[57,105,160],"models":[58],"with":[59],"semantic-sensitive":[60,129],"capabilities.":[61],"be":[63],"concrete,":[64],"our":[65,154],"strategy":[66,155],"first":[67],"generates":[68],"textual":[69],"descriptions":[70,88],"key":[72,147],"objects":[73],"from":[74],"degraded":[76],"via":[78],"VLMs.":[79],"Subsequently,":[80],"text-image":[82],"alignment":[83,117],"model":[84],"remaps":[85],"these":[86],"relevant":[87],"back":[89],"onto":[90],"produce":[94],"spatial":[96],"guidance":[98],"map.":[99],"This":[100,119],"map":[101],"then":[102],"steers":[103],"network":[106,122],"through":[107],"dual-guidance":[109],"mechanism,":[110],"which":[111],"combines":[112],"cross-attention":[113],"an":[115],"explicit":[116],"loss.":[118],"forces":[120],"focus":[124],"its":[125,182],"restorative":[126],"power":[127],"on":[128,166,176],"regions":[130],"during":[131],"reconstruction,":[133],"rather":[134],"than":[135],"pursuing":[136],"globally":[138],"uniform":[139],"improvement,":[140],"ensuring":[142],"faithful":[144],"restoration":[145],"object":[148],"features.":[149],"Experiments":[150],"confirm":[151],"when":[153],"is":[156],"applied":[157],"different":[159],"baselines,":[161],"significantly":[162],"boosts":[163],"their":[164,174],"performance":[165,175],"perceptual":[167],"quality":[168],"metrics":[169],"as":[170,172],"well":[171],"enhances":[173],"detection":[177],"segmentation":[179],"validating":[181],"effectiveness":[183],"adaptability.":[185]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-17T00:00:00"}
