{"id":"https://openalex.org/W7138977699","doi":"https://doi.org/10.48550/arxiv.2603.16340","title":"Iris: Bringing Real-World Priors into Diffusion Model for Monocular Depth Estimation","display_name":"Iris: Bringing Real-World Priors into Diffusion Model for Monocular Depth Estimation","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7138977699","doi":"https://doi.org/10.48550/arxiv.2603.16340"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.16340","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16340","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.16340","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125621091","display_name":"Xinhao Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cai, Xinhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082756213","display_name":"Gensheng Pei","orcid":"https://orcid.org/0000-0002-7677-7487"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pei, Gensheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073755558","display_name":"Zeren Sun","orcid":"https://orcid.org/0000-0001-6262-5338"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Zeren","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129849671","display_name":"Yazhou Yao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Yazhou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074492050","display_name":"Fumin Shen","orcid":"https://orcid.org/0000-0001-7303-3231"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Fumin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130110940","display_name":"Wenguan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Wenguan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5125621091"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.7242000102996826,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.7242000102996826,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.11379999667406082,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10052","display_name":"Medical Image Segmentation Techniques","score":0.038100000470876694,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/prior-probability","display_name":"Prior probability","score":0.9164000153541565},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6725000143051147},{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.5616000294685364},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5343000292778015},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4546000063419342},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.44020000100135803},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3411000072956085},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.3287999927997589}],"concepts":[{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.9164000153541565},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6725000143051147},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6198999881744385},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5839999914169312},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.5616000294685364},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5343000292778015},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4546000063419342},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4496000111103058},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.44020000100135803},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35600000619888306},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.35339999198913574},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3411000072956085},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3303999900817871},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.3287999927997589},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.31869998574256897},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3000999987125397},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2971000075340271},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2865999937057495},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2551000118255615},{"id":"https://openalex.org/C3020402766","wikidata":"https://www.wikidata.org/wiki/Q104376712","display_name":"Prior information","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.16340","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16340","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.16340","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16340","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1,68],"paper,":[2],"we":[3,70],"propose":[4],"\\textbf{Iris},":[5],"a":[6,72,122],"deterministic":[7],"framework":[8],"for":[9],"Monocular":[10],"Depth":[11],"Estimation":[12],"(MDE)":[13],"that":[14,130],"integrates":[15],"real-world":[16],"priors":[17,40,89],"into":[18],"the":[19,78,96],"diffusion":[20],"model.":[21],"Conventional":[22],"feed-forward":[23],"methods":[24,36],"rely":[25],"on":[26],"massive":[27],"training":[28,65],"data,":[29],"yet":[30,41],"still":[31],"miss":[32],"details.":[33],"Previous":[34],"diffusion-based":[35],"leverage":[37],"rich":[38],"generative":[39],"struggle":[42],"with":[43,63,109,121,138],"synthetic-to-real":[44],"domain":[45],"transfer.":[46],"Iris,":[47],"in":[48,135],"contrast,":[49],"preserves":[50],"fine":[51],"details,":[52],"generalizes":[53],"strongly":[54],"from":[55],"synthetic":[56,110],"to":[57,85,103],"real":[58,88],"scenes,":[59],"and":[60,95,118],"remains":[61],"efficient":[62],"limited":[64],"data.":[66],"To":[67],"end,":[69],"introduce":[71],"two-stage":[73],"Priors-to-Geometry":[74],"Deterministic":[75],"(PGD)":[76],"schedule:":[77],"prior":[79],"stage":[80,98],"uses":[81],"Spectral-Gated":[82,100],"Distillation":[83],"(SGD)":[84],"transfer":[86],"low-frequency":[87],"while":[90,107],"leaving":[91],"high-frequency":[92,105],"details":[93],"unconstrained,":[94],"geometry":[97],"applies":[99],"Consistency":[101],"(SGC)":[102],"enforce":[104],"fidelity":[106],"refining":[108],"ground":[111],"truth.":[112],"The":[113],"two":[114],"stages":[115],"share":[116],"weights":[117],"are":[119],"executed":[120],"high-to-low":[123],"timestep":[124],"schedule.":[125],"Extensive":[126],"experimental":[127],"results":[128],"confirm":[129],"Iris":[131],"achieves":[132],"significant":[133],"improvements":[134],"MDE":[136],"performance":[137],"strong":[139],"in-the-wild":[140],"generalization.":[141]},"counts_by_year":[],"updated_date":"2026-03-20T20:54:20.808490","created_date":"2026-03-20T00:00:00"}
