{"id":"https://openalex.org/W7162295764","doi":"https://doi.org/10.48550/arxiv.2605.23281","title":"DepthAgent: Towards Better Universal Depth Estimation via Sample-wise Expert Selection","display_name":"DepthAgent: Towards Better Universal Depth Estimation via Sample-wise Expert Selection","publication_year":2026,"publication_date":"2026-05-22","ids":{"openalex":"https://openalex.org/W7162295764","doi":"https://doi.org/10.48550/arxiv.2605.23281"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.23281","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.23281","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.23281","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136912957","display_name":"Jie Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113315291","display_name":"Girish Chandar Ganesan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ganesan, Girish Chandar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136990912","display_name":"Xiaoming Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xiaoming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9466000199317932,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9466000199317932,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.01640000008046627,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.009399999864399433,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5860999822616577},{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.582099974155426},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5690000057220459},{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.5615000128746033},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5554999709129333},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5407000184059143},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4812000095844269}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.703499972820282},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6392999887466431},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5860999822616577},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.582099974155426},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5690000057220459},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.5615000128746033},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5554999709129333},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5407000184059143},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4812000095844269},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4756999909877777},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.46779999136924744},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3903999924659729},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.36230000853538513},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.3328999876976013},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.3278999924659729},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.3183000087738037},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.3082999885082245},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.29899999499320984},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.29840001463890076},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27489998936653137}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.23281","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.23281","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.23281","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.23281","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4612930119037628}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Monocular":[0],"metric":[1],"depth":[2,37,60,102,107],"estimation":[3],"has":[4],"achieved":[5],"strong":[6,63,188],"progress":[7],"with":[8,71,187],"large-scale":[9],"training":[10],"and":[11,25,47,74,112,117,127,162,170,183,200,204],"universal-camera":[12],"modeling,":[13],"yet":[14],"robust":[15],"deployment":[16],"across":[17,167],"diverse":[18],"camera":[19,45,72,118],"settings,":[20],"such":[21,138],"as":[22,109],"perspective,":[23,168],"fisheye,":[24,169],"panoramic":[26,171],"images,":[27],"remains":[28],"challenging.":[29],"Existing":[30],"methods":[31],"typically":[32],"rely":[33],"on":[34,81,190],"a":[35,96,147],"single":[36],"estimator,":[38],"overlooking":[39],"that":[40,59,152,174],"different":[41,44,51,184],"models":[42,108],"encode":[43],"assumptions":[46],"perform":[48],"best":[49],"under":[50],"input":[52],"domains.":[53],"In":[54],"this":[55],"paper,":[56],"we":[57,93,145],"show":[58,173],"experts":[61,86,122],"exhibit":[62],"sample-wise":[64],"complementarity:":[65],"model":[66,181,205],"preference":[67],"is":[68],"highly":[69],"correlated":[70],"geometry,":[73],"multi-model":[75],"fusion":[76],"brings":[77],"the":[78,194],"largest":[79],"gains":[80],"difficult":[82],"samples":[83],"where":[84],"individual":[85,178],"are":[87],"unreliable.":[88],"Motivated":[89],"by":[90],"these":[91],"observations,":[92],"propose":[94],"\\textbf{\\ours},":[95],"vision-language":[97],"agent":[98],"for":[99,133],"adaptive":[100],"monocular":[101],"estimation.":[103],"DepthAgent":[104],"treats":[105],"existing":[106],"frozen":[110],"tools":[111],"learns":[113],"to":[114],"analyze":[115],"scene":[116],"cues,":[119],"invoke":[120],"suitable":[121],"through":[123],"multi-turn":[124],"tool":[125,156],"utilization,":[126],"select":[128],"or":[129],"fuse":[130],"their":[131],"predictions":[132],"each":[134],"input.":[135],"To":[136],"optimize":[137],"discrete":[139],"decision-making":[140],"toward":[141],"dense":[142],"geometric":[143],"quality,":[144,161],"design":[146],"multi-reward":[148],"reinforcement":[149],"fine-tuning":[150],"scheme":[151],"jointly":[153],"encourages":[154],"valid":[155],"execution,":[157],"camera/scene":[158],"analysis,":[159],"expert-selection":[160],"inference":[163],"efficiency.":[164],"Extensive":[165],"experiments":[166],"benchmarks":[172],"\\ours":[175],"consistently":[176],"outperforms":[177],"experts,":[179],"fixed":[180],"fusion,":[182],"selection":[185,199],"strategies,":[186],"improvements":[189],"challenging":[191],"samples,":[192],"highlighting":[193],"critical":[195],"role":[196],"of":[197],"expert":[198],"fusion.":[201],"The":[202],"code":[203],"will":[206],"be":[207],"released":[208],"upon":[209],"publication.":[210]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-26T00:00:00"}
