{"id":"https://openalex.org/W7160833323","doi":"https://doi.org/10.48550/arxiv.2605.07146","title":"UniV2D: Bridging Visual Restoration and Semantic Perception for Underwater Salient Object Detection","display_name":"UniV2D: Bridging Visual Restoration and Semantic Perception for Underwater Salient Object Detection","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160833323","doi":"https://doi.org/10.48550/arxiv.2605.07146"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.07146","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07146","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.07146","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072913037","display_name":"Laibin Chang","orcid":"https://orcid.org/0000-0002-6510-4359"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Laibin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101794320","display_name":"Shaodong Wang","orcid":"https://orcid.org/0000-0002-7982-6600"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shaodong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135892131","display_name":"Yunke Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yunke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135903075","display_name":"Xu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135827695","display_name":"Kui Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Kui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135834859","display_name":"Chang Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Chang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135879753","display_name":"Bo Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Bo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.6650000214576721,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.6650000214576721,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.2387000024318695,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.05480000004172325,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.6812999844551086},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.657800018787384},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5432000160217285},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.5310999751091003},{"id":"https://openalex.org/keywords/image-restoration","display_name":"Image restoration","score":0.47600001096725464},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.4535999894142151},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.44929999113082886},{"id":"https://openalex.org/keywords/underwater","display_name":"Underwater","score":0.4341999888420105},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.41909998655319214}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7534999847412109},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.6812999844551086},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.657800018787384},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6442000269889832},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5432000160217285},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5343999862670898},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.5310999751091003},{"id":"https://openalex.org/C106430172","wikidata":"https://www.wikidata.org/wiki/Q6002272","display_name":"Image restoration","level":4,"score":0.47600001096725464},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.4535999894142151},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.44929999113082886},{"id":"https://openalex.org/C98083399","wikidata":"https://www.wikidata.org/wiki/Q3246517","display_name":"Underwater","level":2,"score":0.4341999888420105},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.41909998655319214},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4018000066280365},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.35040000081062317},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.32829999923706055},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.32280001044273376},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3061000108718872},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.28200000524520874},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.28029999136924744},{"id":"https://openalex.org/C75608658","wikidata":"https://www.wikidata.org/wiki/Q44395","display_name":"Pascal (unit)","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.2662000060081482},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2655999958515167},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.07146","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07146","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.07146","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07146","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.8390311002731323,"id":"https://metadata.un.org/sdg/14","display_name":"Life below water"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Underwater":[0],"salient":[1,85],"object":[2,86],"detection":[3,60,87],"(USOD)":[4],"plays":[5],"a":[6,33,75,89,107,131,138,147,156,192],"vital":[7],"role":[8],"in":[9,185],"marine":[10],"vision":[11],"tasks":[12],"but":[13],"remains":[14],"fundamentally":[15],"challenging":[16],"due":[17],"to":[18,48,141,151,166],"severe":[19],"visual":[20,40,82,122],"degradation,":[21],"such":[22],"as":[23],"selective":[24],"absorption":[25],"and":[26,61,84,188],"medium":[27],"scattering.":[28],"Conventional":[29],"pipelines":[30,100],"typically":[31],"adopt":[32],"sequential":[34,70],"\"enhance-then-detect\"":[35],"paradigm.":[36],"However,":[37],"isolating":[38],"low-level":[39],"restoration":[41,83,117,149],"from":[42],"high-level":[43,111],"semantic":[44,49,171],"perception":[45],"often":[46],"leads":[47],"inconsistency,":[50],"where":[51],"the":[52,116,120],"restored":[53,121],"images":[54],"may":[55],"not":[56],"be":[57],"optimal":[58],"for":[59,195],"can":[62],"even":[63],"introduce":[64],"task-irrelevant":[65],"noise.":[66],"To":[67],"break":[68],"this":[69],"bottleneck,":[71],"we":[72],"propose":[73],"UniV2D,":[74],"Unified":[76],"Vision-to-Detection":[77],"Network":[78],"that":[79,96,179],"jointly":[80],"optimizes":[81],"within":[88],"mutually":[90],"beneficial":[91],"framework.":[92],"Unlike":[93],"traditional":[94],"methods":[95,184],"rely":[97],"on":[98],"disjointed":[99],"or":[101],"rigid":[102],"physical":[103],"priors,":[104],"UniV2D":[105,129,180],"introduces":[106],"semantic-driven":[108],"learning":[109],"paradigm:":[110],"saliency":[112,126,144],"semantics":[113],"actively":[114],"guide":[115],"process,":[118],"while":[119],"cues":[123],"reciprocally":[124],"enhance":[125],"perception.":[127,198],"Specifically,":[128],"features":[130],"hierarchical":[132],"dual-branch":[133],"architecture.":[134],"It":[135],"first":[136],"employs":[137],"self-calibrated":[139],"decoder":[140],"predict":[142],"initial":[143],"masks":[145],"alongside":[146],"mask-aware":[148],"module":[150,159],"reconstruct":[152],"image":[153],"content.":[154],"Subsequently,":[155],"saliency-guided":[157],"refinement":[158],"equipped":[160],"with":[161,170],"cross-level":[162],"modulation":[163],"is":[164],"utilized":[165],"align":[167],"structural":[168],"fidelity":[169],"consistency.":[172],"Extensive":[173],"experiments":[174],"across":[175],"multiple":[176],"benchmarks":[177],"demonstrate":[178],"significantly":[181],"outperforms":[182],"state-of-the-art":[183],"both":[186],"quantitative":[187],"qualitative":[189],"evaluations,":[190],"establishing":[191],"new":[193],"standard":[194],"joint":[196],"underwater":[197]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-12T00:00:00"}
