{"id":"https://openalex.org/W4414688435","doi":"https://doi.org/10.1145/3770076","title":"Multi-space Representation Fusion Enhanced Monocular Depth Estimation via Virtual Point Cloud","display_name":"Multi-space Representation Fusion Enhanced Monocular Depth Estimation via Virtual Point Cloud","publication_year":2025,"publication_date":"2025-10-01","ids":{"openalex":"https://openalex.org/W4414688435","doi":"https://doi.org/10.1145/3770076"},"language":"en","primary_location":{"id":"doi:10.1145/3770076","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3770076","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047956564","display_name":"Lin Bie","orcid":"https://orcid.org/0000-0002-4844-1353"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lin Bie","raw_affiliation_strings":["BNRist, THUIBCS, BLBCI, School of Software, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-4844-1353","affiliations":[{"raw_affiliation_string":"BNRist, THUIBCS, BLBCI, School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100336543","display_name":"Siqi Li","orcid":"https://orcid.org/0000-0001-9720-826X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siqi Li","raw_affiliation_strings":["BNRist, THUIBCS, BLBCI, School of Software, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-9720-826X","affiliations":[{"raw_affiliation_string":"BNRist, THUIBCS, BLBCI, School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088944559","display_name":"Xiaopin Zhong","orcid":"https://orcid.org/0000-0002-8761-9396"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaopin Zhong","raw_affiliation_strings":["College of Mechatronics and Control Engineering, Shenzhen University, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-8761-9396","affiliations":[{"raw_affiliation_string":"College of Mechatronics and Control Engineering, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004392249","display_name":"Zongze Wu","orcid":"https://orcid.org/0000-0002-0597-1426"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zongze Wu","raw_affiliation_strings":["College of Mechatronics and Control Engineering, Shenzhen University, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-0597-1426","affiliations":[{"raw_affiliation_string":"College of Mechatronics and Control Engineering, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056503588","display_name":"Yue Gao","orcid":"https://orcid.org/0000-0002-9705-3365"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Gao","raw_affiliation_strings":["BNRist, THUIBCS, BLBCI, School of Software, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-9705-3365","affiliations":[{"raw_affiliation_string":"BNRist, THUIBCS, BLBCI, School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5047956564"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25094311,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"21","issue":"12","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11164","display_name":"Remote Sensing and LiDAR Applications","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/2305","display_name":"Environmental Engineering"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11211","display_name":"3D Surveying and Cultural Heritage","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1907","display_name":"Geology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.7196999788284302},{"id":"https://openalex.org/keywords/view-synthesis","display_name":"View synthesis","score":0.5077999830245972},{"id":"https://openalex.org/keywords/depth-map","display_name":"Depth map","score":0.5034999847412109},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.47929999232292175},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4699000120162964},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.37630000710487366},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3758000135421753},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.3749000132083893},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.3732999861240387},{"id":"https://openalex.org/keywords/geometric-primitive","display_name":"Geometric primitive","score":0.3580000102519989}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8321999907493591},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.7196999788284302},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6394000053405762},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.5077999830245972},{"id":"https://openalex.org/C141268832","wikidata":"https://www.wikidata.org/wiki/Q2940499","display_name":"Depth map","level":3,"score":0.5034999847412109},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4925000071525574},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.47929999232292175},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4699000120162964},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.37630000710487366},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3758000135421753},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.3749000132083893},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3732999861240387},{"id":"https://openalex.org/C181095308","wikidata":"https://www.wikidata.org/wiki/Q1541599","display_name":"Geometric primitive","level":2,"score":0.3580000102519989},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.35040000081062317},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.33799999952316284},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.32710000872612},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3176000118255615},{"id":"https://openalex.org/C132459708","wikidata":"https://www.wikidata.org/wiki/Q744069","display_name":"Extrapolation","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C110384440","wikidata":"https://www.wikidata.org/wiki/Q1143270","display_name":"Upsampling","level":3,"score":0.3050999939441681},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.289900004863739},{"id":"https://openalex.org/C51970089","wikidata":"https://www.wikidata.org/wiki/Q44415","display_name":"Virtual image","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C23903533","wikidata":"https://www.wikidata.org/wiki/Q17122739","display_name":"Reprojection error","level":3,"score":0.2831000089645386},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.28139999508857727},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.27869999408721924},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.27559998631477356},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.2720000147819519},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.26930001378059387},{"id":"https://openalex.org/C104065381","wikidata":"https://www.wikidata.org/wiki/Q1002535","display_name":"Geometric modeling","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3770076","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3770076","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2951234442","https://openalex.org/W2955639361","https://openalex.org/W3081167590","https://openalex.org/W4242817480","https://openalex.org/W4310078553","https://openalex.org/W4312472981","https://openalex.org/W4313040116","https://openalex.org/W4320167896","https://openalex.org/W4385413672","https://openalex.org/W4386737189","https://openalex.org/W4390195732","https://openalex.org/W4399486969","https://openalex.org/W4399631183","https://openalex.org/W4399938885","https://openalex.org/W4401635102","https://openalex.org/W4402043846","https://openalex.org/W4402354740"],"related_works":[],"abstract_inverted_index":{"Monocular":[0],"Depth":[1],"Estimation":[2],"(MDE)":[3],"is":[4],"a":[5,38,67,90,117,144],"fundamental":[6],"problem":[7],"in":[8,14,126,133,172],"computer":[9],"vision":[10],"with":[11],"broad":[12],"applications":[13],"various":[15],"downstream":[16],"tasks.":[17],"While":[18],"recent":[19],"studies":[20],"focus":[21],"on":[22,149,168,215,232],"designing":[23],"increasingly":[24],"complex":[25],"and":[26,129,218,226,235],"powerful":[27],"deep":[28],"learning":[29],"methods":[30],"to":[31,52,74,81,95,155,196],"regress":[32],"depth":[33,103,113,159],"maps":[34],"directly,":[35],"we":[36,65,88,106,142,162,188],"propose":[37,189],"novel":[39],"approach":[40],"by":[41,115],"introducing":[42],"the":[43,54,59,76,83,102,123,130,150,153,157,177,198,216,233,240],"Virtual":[44],"Point":[45],"Cloud":[46],"(VPC)":[47],"as":[48],"an":[49,190],"intermediate":[50],"representation":[51,70,119],"provide":[53],"approximate":[55],"geometric":[56,99,109,131,182,193],"prior":[57],"for":[58,101,111],"MDE":[60,72],"task.":[61],"In":[62],"this":[63],"article,":[64],"design":[66,89],"multi-scale":[68,97],"multi-space":[69,118],"fusion-enhanced":[71],"framework":[73],"address":[75],"challenges":[77],"of":[78,85,152,224,242],"MDE.":[79],"Specifically,":[80,161],"resolve":[82],"issue":[84],"scale":[86],"ambiguity,":[87],"VPC":[91,154],"feature":[92],"extraction":[93],"module":[94],"learn":[96],"3D":[98,134,169],"information":[100],"prior.":[104],"Then,":[105],"explicitly":[107],"introduce":[108,143],"constraints":[110],"global":[112],"prediction":[114],"incorporating":[116],"fusion":[120],"from":[121],"both":[122],"texture":[124],"features":[125,132],"2D":[127],"space":[128],"space.":[135],"To":[136],"mitigate":[137],"errors":[138],"at":[139,184],"object":[140,185],"boundaries,":[141],"confidence":[145,178,192],"map":[146,179],"generated":[147],"based":[148,167],"quality":[151],"refine":[156],"predicted":[158],"map.":[160],"construct":[163],"convolution":[164],"receptive":[165],"fields":[166],"spatial":[170],"distances":[171],"spherical":[173],"coordinates,":[174],"ensuring":[175],"that":[176,204],"provides":[180],"reliable":[181],"guidance":[183],"boundaries.":[186],"Furthermore,":[187],"independent":[191],"consistency":[194],"loss":[195],"supervise":[197],"refinement":[199],"process.":[200],"Experimental":[201],"results":[202],"demonstrate":[203],"our":[205,243],"method":[206],"significantly":[207],"outperforms":[208],"state-of-the-art":[209],"approaches":[210],"across":[211],"all":[212],"evaluation":[213],"metrics":[214],"KITTI":[217],"NYU-Depth-v2":[219],"datasets,":[220],"achieving":[221],"RMSE":[222],"improvements":[223],"9.2%":[225],"2.8%,":[227],"respectively.":[228],"Moreover,":[229],"zero-shot":[230],"evaluations":[231],"nuScenes":[234],"SUN-RGBD":[236],"datasets":[237],"further":[238],"validate":[239],"generalizability":[241],"approach.":[244]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
