{"id":"https://openalex.org/W7154586752","doi":"https://doi.org/10.48550/arxiv.2604.13688","title":"Beyond Voxel 3D Editing: Learning from 3D Masks and Self-Constructed Data","display_name":"Beyond Voxel 3D Editing: Learning from 3D Masks and Self-Constructed Data","publication_year":2026,"publication_date":"2026-04-15","ids":{"openalex":"https://openalex.org/W7154586752","doi":"https://doi.org/10.48550/arxiv.2604.13688"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.13688","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13688","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.13688","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003176478","display_name":"Yizhao Xu","orcid":"https://orcid.org/0009-0007-8109-9522"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yizhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133807351","display_name":"Hongyuan Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Hongyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133798899","display_name":"Caiyun Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Caiyun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133785026","display_name":"Tianfu Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Tianfu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133772824","display_name":"Keyu Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Keyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077900121","display_name":"Sicheng Xu","orcid":"https://orcid.org/0000-0002-7903-3934"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Sicheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133803152","display_name":"Jiaolong Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jiaolong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133733740","display_name":"Nicholas Jing Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Nicholas Jing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133800078","display_name":"Qi Zhang","orcid":"https://orcid.org/0009-0001-9927-7936"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Qi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.8070999979972839,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.8070999979972839,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.044199999421834946,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10789","display_name":"Interactive and Immersive Displays","score":0.013299999758601189,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/voxel","display_name":"Voxel","score":0.6010000109672546},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.5870000123977661},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.551800012588501},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.525600016117096},{"id":"https://openalex.org/keywords/image-editing","display_name":"Image editing","score":0.43810001015663147},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.36559998989105225},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.3237000107765198}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.828000009059906},{"id":"https://openalex.org/C54170458","wikidata":"https://www.wikidata.org/wiki/Q663554","display_name":"Voxel","level":2,"score":0.6010000109672546},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.5870000123977661},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.551800012588501},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5278000235557556},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.525600016117096},{"id":"https://openalex.org/C2776674983","wikidata":"https://www.wikidata.org/wiki/Q545981","display_name":"Image editing","level":3,"score":0.43810001015663147},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.36559998989105225},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34279999136924744},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3237000107765198},{"id":"https://openalex.org/C3019007443","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3d model","level":2,"score":0.32280001044273376},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3174000084400177},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.2759000062942505},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.25780001282691956},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2556000053882599},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.13688","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13688","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.13688","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.13688","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"3D":[0,13,16,101,113,149,176],"editing":[1,17,50,61,83],"refers":[2],"to":[3,6,12,27,57,152],"the":[4,41,66,73,78,138,157,181,185],"ability":[5],"apply":[7],"local":[8,32,154],"or":[9],"global":[10],"modifications":[11],"assets.":[14],"Effective":[15],"requires":[18],"maintaining":[19,156],"semantic":[20],"consistency":[21],"by":[22],"performing":[23],"localized":[24],"changes":[25],"according":[26],"prompts,":[28],"while":[29,59,178],"also":[30],"preserving":[31],"invariance":[33],"so":[34],"that":[35,68,167],"unchanged":[36,160],"regions":[37,67,161],"remain":[38],"consistent":[39],"with":[40,105,127],"original.":[42],"However,":[43],"existing":[44],"approaches":[45],"have":[46],"significant":[47],"limitations:":[48],"multi-view":[49],"methods":[51],"incur":[52],"losses":[53],"when":[54],"projecting":[55],"back":[56],"3D,":[58],"voxel-based":[60],"is":[62],"constrained":[63],"in":[64,172],"both":[65],"can":[69],"be":[70],"modified":[71],"and":[72,87],"scale":[74],"of":[75,80,134,159,184],"modifications.":[76],"Moreover,":[77],"lack":[79],"sufficiently":[81],"large":[82],"datasets":[84],"for":[85,112,140],"training":[86],"evaluation":[88],"remains":[89],"a":[90,98,106,122],"challenge.":[91],"To":[92],"address":[93],"these":[94],"challenges,":[95],"we":[96,145],"propose":[97],"Beyond":[99],"Voxel":[100],"Editing":[102],"(BVE)":[103],"framework":[104],"self-constructed":[107],"large-scale":[108],"dataset":[109],"specifically":[110],"tailored":[111],"editing.":[114,163],"Building":[115],"upon":[116],"this":[117],"dataset,":[118],"our":[119],"model":[120],"enhances":[121],"foundational":[123],"image-to-3D":[124],"generative":[125],"architecture":[126],"lightweight,":[128],"trainable":[129],"modules,":[130],"enabling":[131],"efficient":[132],"injection":[133],"textual":[135],"semantics":[136],"without":[137],"need":[139],"expensive":[141],"full-model":[142],"retraining.":[143],"Furthermore,":[144],"introduce":[146],"an":[147],"annotation-free":[148],"masking":[150],"strategy":[151],"preserve":[153],"invariance,":[155],"integrity":[158],"during":[162],"Extensive":[164],"experiments":[165],"demonstrate":[166],"BVE":[168],"achieves":[169],"superior":[170],"performance":[171],"generating":[173],"high-quality,":[174],"text-aligned":[175],"assets,":[177],"faithfully":[179],"retaining":[180],"visual":[182],"characteristics":[183],"original":[186],"input.":[187]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-17T00:00:00"}
