{"id":"https://openalex.org/W7159960520","doi":"https://doi.org/10.1109/iccv51701.2025.00727","title":"GSV3D: Gaussian Splatting-Based Geometric Distillation With Stable Video Diffusion for Single-Image 3D Object Generation","display_name":"GSV3D: Gaussian Splatting-Based Geometric Distillation With Stable Video Diffusion for Single-Image 3D Object Generation","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W7159960520","doi":"https://doi.org/10.1109/iccv51701.2025.00727"},"language":null,"primary_location":{"id":"doi:10.1109/iccv51701.2025.00727","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00727","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2503.06136","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135081297","display_name":"Ye Tao","orcid":null},"institutions":[{"id":"https://openalex.org/I4392738276","display_name":"State Key Laboratory of Virtual Reality Technology and Systems","ror":"https://ror.org/0009eea46","country_code":null,"type":"facility","lineage":["https://openalex.org/I4392738276","https://openalex.org/I82880672"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Tao","raw_affiliation_strings":["Beihang University,State Key Laboratory of Virtual Reality Technology and Systems"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beihang University,State Key Laboratory of Virtual Reality Technology and Systems","institution_ids":["https://openalex.org/I82880672","https://openalex.org/I4392738276"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135073736","display_name":"Jiawei Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiawei Zhang","raw_affiliation_strings":["SenseTime Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010582949","display_name":"Yahao Shi","orcid":"https://orcid.org/0009-0009-2296-0241"},"institutions":[{"id":"https://openalex.org/I4392738276","display_name":"State Key Laboratory of Virtual Reality Technology and Systems","ror":"https://ror.org/0009eea46","country_code":null,"type":"facility","lineage":["https://openalex.org/I4392738276","https://openalex.org/I82880672"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yahao Shi","raw_affiliation_strings":["Beihang University,State Key Laboratory of Virtual Reality Technology and Systems"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beihang University,State Key Laboratory of Virtual Reality Technology and Systems","institution_ids":["https://openalex.org/I82880672","https://openalex.org/I4392738276"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039109630","display_name":"Dongqing Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dongqing Zou","raw_affiliation_strings":["SenseTime Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5135063323","display_name":"Bin Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bin Zhou","raw_affiliation_strings":["SenseTime Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SenseTime Research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.68419231,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7751","last_page":"7760"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11408","display_name":"Advanced Optical Imaging Technologies","score":0.28209999203681946,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11408","display_name":"Advanced Optical Imaging Technologies","score":0.28209999203681946,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.14489999413490295,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.06419999897480011,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5192000269889832},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.454800009727478},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.3797999918460846},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.32749998569488525},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.305400013923645},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.30399999022483826}],"concepts":[{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5246000289916992},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5192000269889832},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4964999854564667},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4702000021934509},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.454800009727478},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.38679999113082886},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.3797999918460846},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35370001196861267},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.32749998569488525},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.305400013923645},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.30399999022483826},{"id":"https://openalex.org/C7305733","wikidata":"https://www.wikidata.org/wiki/Q207961","display_name":"Geometric shape","level":2,"score":0.2930999994277954},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.27559998631477356},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C138777275","wikidata":"https://www.wikidata.org/wiki/Q6884054","display_name":"Mixing (physics)","level":2,"score":0.257099986076355},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.2531000077724457}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/iccv51701.2025.00727","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccv51701.2025.00727","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/CVF International Conference on Computer Vision (ICCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2503.06136","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2503.06136","pdf_url":"https://arxiv.org/pdf/2503.06136","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2503.06136","is_oa":true,"landing_page_url":"https://arxiv.org/abs/2503.06136","pdf_url":"https://arxiv.org/pdf/2503.06136","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2791438580","display_name":null,"funder_award_id":"Z221100006322003","funder_id":"https://openalex.org/F4320335843","funder_display_name":"Beijing Science and Technology Planning Project"},{"id":"https://openalex.org/G5158269727","display_name":null,"funder_award_id":"61932003","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5186992608","display_name":null,"funder_award_id":"2022YFF0904303","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335843","display_name":"Beijing Science and Technology Planning Project","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Image-based":[0],"3D":[1,16,25,58,63,76,86,135,143,154],"generation":[2,144],"has":[3],"vast":[4],"applications":[5],"in":[6],"robotics":[7],"and":[8,14,33,105,133,145,153,163],"gaming,":[9],"where":[10],"high-quality,":[11,130],"diverse":[12,167],"outputs":[13,82],"consistent":[15],"representations":[17,96],"are":[18,28],"crucial.":[19],"However,":[20],"existing":[21],"methods":[22],"have":[23],"limitations:":[24],"diffusion":[26,55],"models":[27],"limited":[29],"by":[30,78],"dataset":[31],"scarcity":[32],"the":[34,70,147],"absence":[35],"of":[36],"strong":[37,164],"pre-trained":[38],"priors,":[39],"while":[40,61],"2D":[41,54,95,150],"diffusion-based":[42],"approaches":[43],"struggle":[44],"with":[45],"geometric":[46,67,112,121],"consistency.":[47,122],"We":[48],"propose":[49],"a":[50,124,138],"method":[51],"that":[52],"leverages":[53],"models'":[56],"implicit":[57,94],"reasoning":[59],"ability":[60],"ensuring":[62,119],"consistency":[64,77,110,162],"via":[65],"Gaussian-splatting-based":[66],"distillation.":[68],"Specifically,":[69],"proposed":[71],"Gaussian":[72,100],"Splatting":[73,101],"Decoder":[74],"enforces":[75],"transforming":[79],"SV3D":[80],"latent":[81],"into":[83],"an":[84],"explicit":[85],"representation.":[87],"Unlike":[88],"SV3D,":[89],"which":[90],"only":[91],"relies":[92],"on":[93],"for":[97,141],"video":[98],"generation,":[99],"explicitly":[102],"encodes":[103],"spatial":[104],"appearance":[106],"attributes,":[107],"enabling":[108],"multi-view":[109,161],"through":[111],"constraints.":[113],"These":[114],"constraints":[115],"correct":[116],"view":[117],"inconsistencies,":[118],"robust":[120],"As":[123],"result,":[125],"our":[126],"approach":[127],"simultaneously":[128],"generates":[129],"multi-view-consistent":[131],"images":[132],"accurate":[134],"models,":[136],"providing":[137],"scalable":[139],"solution":[140],"single-image-based":[142],"bridging":[146],"gap":[148],"between":[149],"Diffusion":[151],"diversity":[152],"structural":[155],"coherence.":[156],"Experimental":[157],"results":[158],"demonstrate":[159],"state-of-the-art":[160],"generalization":[165],"across":[166],"datasets.":[168],"The":[169],"code":[170],"will":[171],"be":[172],"made":[173],"publicly":[174],"available":[175],"upon":[176],"acceptance.":[177]},"counts_by_year":[],"updated_date":"2026-07-03T08:13:44.112507","created_date":"2026-05-04T00:00:00"}
