{"id":"https://openalex.org/W4417124917","doi":"https://doi.org/10.1145/3757377.3763991","title":"MODepth: Benchmarking Mobile Multi-frame Monocular Depth Estimation with Optical Image Stabilization","display_name":"MODepth: Benchmarking Mobile Multi-frame Monocular Depth Estimation with Optical Image Stabilization","publication_year":2025,"publication_date":"2025-12-08","ids":{"openalex":"https://openalex.org/W4417124917","doi":"https://doi.org/10.1145/3757377.3763991"},"language":null,"primary_location":{"id":"doi:10.1145/3757377.3763991","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763991","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3757377.3763991","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007681745","display_name":"Yu Lu","orcid":"https://orcid.org/0000-0002-9024-3692"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yu Lu","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100740620","display_name":"Hao Pan","orcid":"https://orcid.org/0000-0002-2531-0107"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Pan","raw_affiliation_strings":["Microsoft Research Asia, Shanghai, China and School of Computer Science, Shanghai Jiao Tong University, Shanghai, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Shanghai, China and School of Computer Science, Shanghai Jiao Tong University, Shanghai, Shanghai, China","institution_ids":["https://openalex.org/I183067930","https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059988948","display_name":"Dian Ding","orcid":"https://orcid.org/0000-0002-2190-0919"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dian Ding","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100966128","display_name":"Jiatong Ding","orcid":"https://orcid.org/0009-0000-7552-9468"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiatong Ding","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050655047","display_name":"Yongjian Fu","orcid":"https://orcid.org/0000-0001-8481-2644"},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongjian Fu","raw_affiliation_strings":["Central South University, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Central South University, Changsha, China","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009913466","display_name":"Yi\u2010Chao Chen","orcid":"https://orcid.org/0000-0003-0782-4953"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi-Chao Chen","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015419107","display_name":"Ju Ren","orcid":"https://orcid.org/0000-0003-2782-183X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ju Ren","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101490654","display_name":"Guangtao Xue","orcid":"https://orcid.org/0000-0002-1617-3593"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangtao Xue","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5007681745"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":1.2181,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.85657265,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.4375999867916107,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.4375999867916107,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13114","display_name":"Image Processing Techniques and Applications","score":0.27379998564720154,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13579","display_name":"Image and Video Stabilization","score":0.05469999834895134,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.7580999732017517},{"id":"https://openalex.org/keywords/pose","display_name":"Pose","score":0.5170000195503235},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.47620001435279846},{"id":"https://openalex.org/keywords/parallax","display_name":"Parallax","score":0.46459999680519104},{"id":"https://openalex.org/keywords/offset","display_name":"Offset (computer science)","score":0.46380001306533813},{"id":"https://openalex.org/keywords/motion-estimation","display_name":"Motion estimation","score":0.45339998602867126},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.4431000053882599},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.388700008392334}],"concepts":[{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.7580999732017517},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7563999891281128},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7533000111579895},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6823999881744385},{"id":"https://openalex.org/C52102323","wikidata":"https://www.wikidata.org/wiki/Q1671968","display_name":"Pose","level":2,"score":0.5170000195503235},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.47620001435279846},{"id":"https://openalex.org/C15759828","wikidata":"https://www.wikidata.org/wiki/Q165074","display_name":"Parallax","level":2,"score":0.46459999680519104},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.46380001306533813},{"id":"https://openalex.org/C10161872","wikidata":"https://www.wikidata.org/wiki/Q557891","display_name":"Motion estimation","level":2,"score":0.45339998602867126},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.4431000053882599},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.388700008392334},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.3774999976158142},{"id":"https://openalex.org/C158829959","wikidata":"https://www.wikidata.org/wiki/Q1640606","display_name":"Monocular vision","level":2,"score":0.3443000018596649},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.33660000562667847},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C141268832","wikidata":"https://www.wikidata.org/wiki/Q2940499","display_name":"Depth map","level":3,"score":0.3052000105381012},{"id":"https://openalex.org/C157202957","wikidata":"https://www.wikidata.org/wiki/Q1659609","display_name":"Image warping","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C99404194","wikidata":"https://www.wikidata.org/wiki/Q163362","display_name":"Vanishing point","level":3,"score":0.2662000060081482},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2653999924659729},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.260699987411499},{"id":"https://openalex.org/C2778022956","wikidata":"https://www.wikidata.org/wiki/Q192234","display_name":"Camera lens","level":3,"score":0.25850000977516174}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3757377.3763991","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763991","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3757377.3763991","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3757377.3763991","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the SIGGRAPH Asia 2025 Conference Papers","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3301288343","display_name":null,"funder_award_id":"61936015","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4336681735","display_name":null,"funder_award_id":"24ZR1430600","funder_id":"https://openalex.org/F4320309612","funder_display_name":"Natural Science Foundation of Shanghai"}],"funders":[{"id":"https://openalex.org/F4320309612","display_name":"Natural Science Foundation of Shanghai","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W1914596733","https://openalex.org/W2108598243","https://openalex.org/W2133665775","https://openalex.org/W2134618025","https://openalex.org/W2194775991","https://openalex.org/W2593414960","https://openalex.org/W2594519801","https://openalex.org/W2725499676","https://openalex.org/W2886322387","https://openalex.org/W2926429807","https://openalex.org/W2962793285","https://openalex.org/W2962807621","https://openalex.org/W2963488291","https://openalex.org/W2963591054","https://openalex.org/W2963911235","https://openalex.org/W2985775862","https://openalex.org/W2990394353","https://openalex.org/W2990946490","https://openalex.org/W3009257710","https://openalex.org/W3009928773","https://openalex.org/W3034364596","https://openalex.org/W3088639796","https://openalex.org/W3106931899","https://openalex.org/W3109908659","https://openalex.org/W3174211490","https://openalex.org/W3202016623","https://openalex.org/W3203439539","https://openalex.org/W3206822855","https://openalex.org/W3214774289","https://openalex.org/W4200495456","https://openalex.org/W4214520160","https://openalex.org/W4226265017","https://openalex.org/W4304142024","https://openalex.org/W4306179608","https://openalex.org/W4313171699","https://openalex.org/W4327519267","https://openalex.org/W4386083069","https://openalex.org/W4388979610","https://openalex.org/W4390872625","https://openalex.org/W4402727115","https://openalex.org/W4402727359","https://openalex.org/W4404034703"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"MODepth,":[3],"a":[4,57,91,121,161],"multi-frame":[5,48,110],"monocular":[6,127],"depth":[7,59,111,122,128],"estimation":[8,60,69,73,112,129],"system":[9],"based":[10],"on":[11],"the":[12,33,65,84],"controlled":[13],"motion":[14],"of":[15,32,86,146],"an":[16,108,144],"optical":[17],"image":[18],"stabilization":[19],"(OIS)":[20],"module.":[21],"By":[22],"actively":[23],"injecting":[24],"acoustic":[25],"signals,":[26],"we":[27,55,89],"induce":[28],"regular":[29],"translational":[30],"movements":[31],"OIS":[34],"lens,":[35],"resulting":[36],"in":[37,98],"controllable":[38],"camera":[39],"pose":[40,45,72],"changes":[41],"and":[42,63,71,103,124,168],"simplifying":[43],"inter-frame":[44],"estimation.":[46],"Leveraging":[47],"images":[49],"captured":[50],"under":[51],"OIS-controlled":[52],"lens":[53],"movements,":[54],"design":[56],"high-precision":[58],"network,":[61],"MODNet,":[62],"introduce":[64],"principal":[66],"point":[67],"offset":[68],"module":[70],"modules":[74],"to":[75,116,139,164],"fully":[76,140],"exploit":[77],"geometric":[78],"information":[79],"across":[80],"frames.":[81],"To":[82],"validate":[83],"effectiveness":[85],"our":[87,105],"approach,":[88],"collect":[90],"new":[92],"dataset":[93,169],"MODdata":[94],"with":[95,157],"1100":[96],"samples":[97],"nearly":[99],"220":[100],"indoor":[101],"scenarios":[102],"benchmark":[104],"model":[106],"as":[107],"OIS-based":[109],"method,":[113],"comparing":[114],"it":[115],"ground":[117],"truth":[118],"obtained":[119],"from":[120],"sensor":[123],"other":[125],"state-of-the-art":[126],"algorithms.":[130],"Our":[131],"method":[132],"achieves":[133],"competitive":[134],"or":[135],"superior":[136],"performance":[137],"compared":[138],"supervised":[141],"baselines,":[142],"reaching":[143],"RMSE":[145],"0.439,":[147],"which":[148],"outperforms":[149],"all":[150],"evaluated":[151],"methods,":[152],"demonstrating":[153],"that":[154],"self-supervised":[155],"fine-tuning":[156],"OIS-induced":[158],"parallax":[159],"is":[160],"viable":[162],"alternative":[163],"ground-truth":[165],"supervision.":[166],"Code":[167],"are":[170],"available":[171],"at:":[172],"https://github.com/liangjindeamo-yuer/MODEPTH":[173]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-12-08T00:00:00"}
