{"id":"https://openalex.org/W7125973916","doi":"https://doi.org/10.48550/arxiv.2601.19785","title":"GeoDiff3D: Self-Supervised 3D Scene Generation with Geometry-Constrained 2D Diffusion Guidance","display_name":"GeoDiff3D: Self-Supervised 3D Scene Generation with Geometry-Constrained 2D Diffusion Guidance","publication_year":2026,"publication_date":"2026-01-27","ids":{"openalex":"https://openalex.org/W7125973916","doi":"https://doi.org/10.48550/arxiv.2601.19785"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2601.19785","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124059796","display_name":"Haozhi Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhu, Haozhi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124102009","display_name":"Miaomiao Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Miaomiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124081228","display_name":"Dingyao Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Dingyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102705406","display_name":"Runze Tian","orcid":"https://orcid.org/0000-0001-8923-0328"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Runze","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124075032","display_name":"Yan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124081647","display_name":"Jie Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5111161796","display_name":"Fenggen Yu","orcid":"https://orcid.org/0000-0003-1591-4668"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Fenggen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5124059796"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.5281000137329102,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.5281000137329102,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.26179999113082886,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.07840000092983246,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.5429999828338623},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.521399974822998},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.43560001254081726},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.39250001311302185},{"id":"https://openalex.org/keywords/3d-reconstruction","display_name":"3D reconstruction","score":0.38940000534057617},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.35100001096725464},{"id":"https://openalex.org/keywords/solid-modeling","display_name":"Solid modeling","score":0.34529998898506165},{"id":"https://openalex.org/keywords/view-synthesis","display_name":"View synthesis","score":0.3452000021934509},{"id":"https://openalex.org/keywords/3d-modeling","display_name":"3D modeling","score":0.3450999855995178}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6980000138282776},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.5429999828338623},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5412999987602234},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.521399974822998},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.499099999666214},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.43560001254081726},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.39250001311302185},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.38940000534057617},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.35100001096725464},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.34529998898506165},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.3452000021934509},{"id":"https://openalex.org/C2777897806","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3D modeling","level":2,"score":0.3450999855995178},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34150001406669617},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.3328000009059906},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.31929999589920044},{"id":"https://openalex.org/C2985909886","wikidata":"https://www.wikidata.org/wiki/Q193147","display_name":"Spatial coherence","level":3,"score":0.31380000710487366},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C3019007443","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3d model","level":2,"score":0.3028999865055084},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.3005000054836273},{"id":"https://openalex.org/C2778597888","wikidata":"https://www.wikidata.org/wiki/Q172169","display_name":"3D city models","level":3,"score":0.2939999997615814},{"id":"https://openalex.org/C1223959","wikidata":"https://www.wikidata.org/wiki/Q1191960","display_name":"Stereo display","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C84824328","wikidata":"https://www.wikidata.org/wiki/Q4633097","display_name":"2D to 3D conversion","level":3,"score":0.2815000116825104},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C50494287","wikidata":"https://www.wikidata.org/wiki/Q658467","display_name":"Texture synthesis","level":5,"score":0.274399995803833},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.27379998564720154},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.2554999887943268}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2601.19785","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2601.19785","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.19785","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2601.19785","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"3D":[0,42,124,155,180],"scene":[1,132,156,181],"generation":[2,43,167],"is":[3],"a":[4,85,89,173],"core":[5],"technology":[6],"for":[7,14,176],"gaming,":[8],"film/VFX,":[9],"and":[10,19,40,53,66,88,111,127,134,151,166,178],"VR/AR.":[11],"Growing":[12],"demand":[13],"rapid":[15],"iteration,":[16],"high-fidelity":[17],"detail,":[18],"accessible":[20,177],"content":[21],"creation":[22],"has":[23],"further":[24,121],"increased":[25],"interest":[26],"in":[27,70],"this":[28],"area.":[29],"Existing":[30],"methods":[31],"broadly":[32],"follow":[33],"two":[34],"paradigms":[35],"-":[36,44],"indirect":[37],"2D-to-3D":[38],"reconstruction":[39],"direct":[41],"but":[45],"both":[46],"are":[47],"limited":[48],"by":[49],"weak":[50],"structural":[51,62,86],"modeling":[52],"heavy":[54],"reliance":[55],"on":[56,141,160],"large-scale":[57],"ground-truth":[58],"supervision,":[59],"often":[60],"producing":[61],"artifacts,":[63],"geometric":[64],"inconsistencies,":[65],"degraded":[67],"high-frequency":[68],"details":[69,136],"complex":[71],"scenes.":[72],"We":[73,120],"propose":[74],"GeoDiff3D,":[75],"an":[76],"efficient":[77,179],"self-supervised":[78],"framework":[79],"that":[80],"uses":[81],"coarse":[82],"geometry":[83],"as":[84],"anchor":[87],"geometry-constrained":[90],"2D":[91],"diffusion":[92],"model":[93],"to":[94,114,130],"provide":[95],"texture-rich":[96],"reference":[97],"images.":[98],"Importantly,":[99],"GeoDiff3D":[100,144],"does":[101],"not":[102],"require":[103],"strict":[104],"multi-view":[105],"consistency":[106],"of":[107],"the":[108,115],"diffusion-generated":[109],"references":[110],"remains":[112],"robust":[113],"resulting":[116],"noisy,":[117],"inconsistent":[118],"guidance.":[119],"introduce":[122],"voxel-aligned":[123],"feature":[125],"aggregation":[126],"dual":[128],"self-supervision":[129],"maintain":[131],"coherence":[133],"fine":[135],"while":[137],"substantially":[138],"reducing":[139],"dependence":[140],"labeled":[142],"data.":[143],"also":[145],"trains":[146],"with":[147],"low":[148],"computational":[149],"cost":[150],"enables":[152],"fast,":[153],"high-quality":[154],"generation.":[157],"Extensive":[158],"experiments":[159],"challenging":[161],"scenes":[162],"show":[163],"improved":[164],"generalization":[165],"quality":[168],"over":[169],"existing":[170],"baselines,":[171],"offering":[172],"practical":[174],"solution":[175],"construction.":[182]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-01-29T00:00:00"}
