{"id":"https://openalex.org/W7127144618","doi":"https://doi.org/10.1109/tmm.2026.3660136","title":"Toward Smooth Depth Driven by Selective Attention and Selective Aggregation","display_name":"Toward Smooth Depth Driven by Selective Attention and Selective Aggregation","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7127144618","doi":"https://doi.org/10.1109/tmm.2026.3660136"},"language":null,"primary_location":{"id":"doi:10.1109/tmm.2026.3660136","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2026.3660136","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014633392","display_name":"Cheol Hoon Park","orcid":"https://orcid.org/0000-0001-7942-7311"},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Cheol-Hoon Park","raw_affiliation_strings":["School of Electrical Engineering, Korea University, Seoul, South Korea"],"raw_orcid":"https://orcid.org/0009-0008-8271-2971","affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, Korea University, Seoul, South Korea","institution_ids":["https://openalex.org/I197347611"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081937055","display_name":"Woo Jin Ahn","orcid":"https://orcid.org/0000-0002-8261-8132"},"institutions":[{"id":"https://openalex.org/I191879574","display_name":"Inha University","ror":"https://ror.org/01easw929","country_code":"KR","type":"education","lineage":["https://openalex.org/I191879574"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Woo-Jin Ahn","raw_affiliation_strings":["Department of Artificial Intelligence, Inha University, Incheon, South Korea"],"raw_orcid":"https://orcid.org/0000-0002-8261-8132","affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, Inha University, Incheon, South Korea","institution_ids":["https://openalex.org/I191879574"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015493373","display_name":"Hyun Duck Choi","orcid":"https://orcid.org/0000-0003-2811-4870"},"institutions":[{"id":"https://openalex.org/I118373667","display_name":"Seoul National University of Science and Technology","ror":"https://ror.org/00chfja07","country_code":"KR","type":"education","lineage":["https://openalex.org/I118373667"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyun-Duck Choi","raw_affiliation_strings":["Department of Smart ICT Convergence Engineering, Seoul National University of Science and Technology, Seoul, South Korea"],"raw_orcid":"https://orcid.org/0000-0003-2811-4870","affiliations":[{"raw_affiliation_string":"Department of Smart ICT Convergence Engineering, Seoul National University of Science and Technology, Seoul, South Korea","institution_ids":["https://openalex.org/I118373667"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11570441,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"28","issue":null,"first_page":"4585","last_page":"4595"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9199000000953674,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9199000000953674,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.015399999916553497,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.012799999676644802,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6448000073432922},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.614300012588501},{"id":"https://openalex.org/keywords/aggregate","display_name":"Aggregate (composite)","score":0.5835999846458435},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.48100000619888306},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.44209998846054077},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.43810001015663147},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.42969998717308044},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.41530001163482666},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.4101000130176544}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8575999736785889},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6448000073432922},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.614300012588501},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.5835999846458435},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.48100000619888306},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44859999418258667},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.44440001249313354},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.44209998846054077},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.43810001015663147},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.42969998717308044},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.41530001163482666},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.4101000130176544},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38119998574256897},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C156273044","wikidata":"https://www.wikidata.org/wiki/Q4913766","display_name":"Bin","level":2,"score":0.3474000096321106},{"id":"https://openalex.org/C194544171","wikidata":"https://www.wikidata.org/wiki/Q21105679","display_name":"Gating","level":2,"score":0.33489999175071716},{"id":"https://openalex.org/C53533937","wikidata":"https://www.wikidata.org/wiki/Q185020","display_name":"Histogram","level":3,"score":0.3215000033378601},{"id":"https://openalex.org/C2989146674","wikidata":"https://www.wikidata.org/wiki/Q4818236","display_name":"Selective attention","level":3,"score":0.3179999887943268},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3077000081539154},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.29010000824928284},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.2809000015258789},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C77637269","wikidata":"https://www.wikidata.org/wiki/Q7002051","display_name":"Neural coding","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.25589999556541443},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2026.3660136","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2026.3660136","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321294","display_name":"Seoul National University of Science and Technology","ror":"https://ror.org/00chfja07"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,196,217],"challenges":[1],"in":[2,64,119,189],"single-image":[3],"depth":[4,15,62,194,205],"prediction":[5],"(SIDP)":[6],"are":[7,50],"mainly":[8],"due":[9],"to":[10,100,148,166,181,214],"the":[11,19,84,113],"lack":[12],"of":[13,21,115],"smooth":[14,61],"ground":[16],"truth":[17],"and":[18,23,66,81,92,104,137,170,192,202],"presence":[20],"irregular":[22],"complex":[24],"objects.":[25],"While":[26],"window-based":[27],"attention":[28,76,121,201],"mechanisms,":[29],"which":[30,132],"balance":[31],"long-range":[32],"dependency":[33],"capture":[34],"with":[35,126,140],"computational":[36],"efficiency":[37],"by":[38,52,95],"processing":[39],"elements":[40],"within":[41],"a":[42,53,74,134,176,190],"fixed":[43],"grid,":[44],"have":[45],"advanced":[46],"SIDP":[47],"research,":[48],"they":[49],"limited":[51],"constrained":[54],"search":[55],"range.":[56],"This":[57],"limitation":[58],"can":[59],"impede":[60],"estimation":[63],"irregularity":[65],"complexity.":[67],"To":[68],"address":[69],"these":[70,183],"challenges,":[71],"we":[72,111,159],"propose":[73],"novel":[75],"mechanism":[77,136],"that":[78],"selectively":[79],"identifies":[80],"aggregates":[82],"only":[83],"most":[85],"relevant":[86],"information.":[87],"Our":[88,129],"approach":[89],"enables":[90],"flexible":[91],"efficient":[93],"exploration":[94],"using":[96],"data-dependent":[97],"movable":[98],"offsets":[99],"select":[101],"substantial":[102],"tokens":[103],"designating":[105],"them":[106,147],"as":[107],"key-value":[108],"pairs.":[109],"Furthermore,":[110],"overcome":[112],"issue":[114],"small":[116],"softmax":[117],"values":[118],"traditional":[120],"mechanisms":[122],"through":[123],"score-based":[124],"grouping":[125],"top-k":[127],"selection.":[128],"feed-forward":[130],"network,":[131,198],"incorporates":[133],"gating":[135],"grouped":[138],"convolutions":[139],"varying":[141],"cardinalities,":[142],"refines":[143],"features":[144],"before":[145],"passing":[146],"subsequent":[149],"layers,":[150],"allowing":[151],"for":[152],"targeted":[153],"focus":[154],"on":[155],"input":[156],"features.":[157],"Finally,":[158],"utilize":[160],"feature":[161],"maps":[162],"from":[163],"hierarchical":[164],"decoders":[165],"estimate":[167],"bin":[168],"centers":[169],"per-pixel":[171],"probability":[172,185],"distributions.":[173],"We":[174],"introduce":[175],"4-way":[177],"selective":[178,200,203],"scanning":[179],"technique":[180],"aggregate":[182,204],"perpixel":[184],"distributions":[186],"smoothly,":[187],"resulting":[188],"dense":[191],"continuous":[193],"map.":[195],"proposed":[197],"named":[199],"(SA2Depth),":[206],"demonstrates":[207],"state-of-the-art":[208],"performance":[209],"across":[210],"multiple":[211],"datasets":[212],"compared":[213],"previous":[215],"methods.":[216],"code":[218],"is":[219],"available":[220],"at":[221],"<uri":[222],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[223],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/towardsDLCV/SA2DEPTH</uri>.":[224]},"counts_by_year":[],"updated_date":"2026-06-23T06:36:01.041984","created_date":"2026-02-03T00:00:00"}
