{"id":"https://openalex.org/W7151882125","doi":"https://doi.org/10.48550/arxiv.2604.06113","title":"SEM-ROVER: Semantic Voxel-Guided Diffusion for Large-Scale Driving Scene Generation","display_name":"SEM-ROVER: Semantic Voxel-Guided Diffusion for Large-Scale Driving Scene Generation","publication_year":2026,"publication_date":"2026-04-07","ids":{"openalex":"https://openalex.org/W7151882125","doi":"https://doi.org/10.48550/arxiv.2604.06113"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.06113","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06113","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.06113","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030321478","display_name":"Hiba Dahmani","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dahmani, Hiba","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074123466","display_name":"Nathan Piasco","orcid":"https://orcid.org/0000-0001-7952-6643"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Piasco, Nathan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003184493","display_name":"Mouss\u00e2b Bennehar","orcid":"https://orcid.org/0000-0002-6566-6132"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bennehar, Moussab","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133232852","display_name":"Luis Rold\u00e3o","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rold\u00e3o, Luis","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085554991","display_name":"Dzmitry Tsishkou","orcid":"https://orcid.org/0009-0002-9798-3316"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tsishkou, Dzmitry","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014947118","display_name":"Laurent Caraffa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Caraffa, Laurent","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133185275","display_name":"Jean-Philippe Tarel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tarel, Jean-Philippe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5060894578","display_name":"Roland Br\u00e9mond","orcid":"https://orcid.org/0000-0003-3150-7624"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Br\u00e9mond, Roland","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5030321478"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5044999718666077,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.5044999718666077,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.23399999737739563,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.07819999754428864,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.7705000042915344},{"id":"https://openalex.org/keywords/voxel","display_name":"Voxel","score":0.6295999884605408},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6039000153541565},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5561000108718872},{"id":"https://openalex.org/keywords/spatial-coherence","display_name":"Spatial coherence","score":0.544700026512146},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.4684999883174896},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.39959999918937683},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.3995000123977661},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.38190001249313354},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.36500000953674316}],"concepts":[{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.7705000042915344},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.755299985408783},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7166000008583069},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7014999985694885},{"id":"https://openalex.org/C54170458","wikidata":"https://www.wikidata.org/wiki/Q663554","display_name":"Voxel","level":2,"score":0.6295999884605408},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6039000153541565},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5561000108718872},{"id":"https://openalex.org/C2985909886","wikidata":"https://www.wikidata.org/wiki/Q193147","display_name":"Spatial coherence","level":3,"score":0.544700026512146},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.5026999711990356},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.4684999883174896},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.39959999918937683},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.3995000123977661},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.38190001249313354},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.36500000953674316},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.35199999809265137},{"id":"https://openalex.org/C44185422","wikidata":"https://www.wikidata.org/wiki/Q6002064","display_name":"Image-based modeling and rendering","level":3,"score":0.335099995136261},{"id":"https://openalex.org/C2778597888","wikidata":"https://www.wikidata.org/wiki/Q172169","display_name":"3D city models","level":3,"score":0.3327000141143799},{"id":"https://openalex.org/C89720835","wikidata":"https://www.wikidata.org/wiki/Q1531701","display_name":"Global illumination","level":3,"score":0.3239000141620636},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.3224000036716461},{"id":"https://openalex.org/C2777897806","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3D modeling","level":2,"score":0.3224000036716461},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C2776035091","wikidata":"https://www.wikidata.org/wiki/Q7928819","display_name":"Viewpoints","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C3019007443","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3d model","level":2,"score":0.31610000133514404},{"id":"https://openalex.org/C2987571089","wikidata":"https://www.wikidata.org/wiki/Q738160","display_name":"Multi camera","level":2,"score":0.2989000082015991},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2867000102996826},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C1223959","wikidata":"https://www.wikidata.org/wiki/Q1191960","display_name":"Stereo display","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C204060014","wikidata":"https://www.wikidata.org/wiki/Q6002069","display_name":"Image-based lighting","level":4,"score":0.27140000462532043},{"id":"https://openalex.org/C2989087649","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Image synthesis","level":3,"score":0.2687999904155731},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C200585589","wikidata":"https://www.wikidata.org/wiki/Q752176","display_name":"Texture mapping","level":2,"score":0.25699999928474426},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.25440001487731934}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.06113","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06113","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.06113","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06113","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.8220195770263672,"display_name":"Sustainable cities and communities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Scalable":[0],"generation":[1,141],"of":[2,79],"outdoor":[3,156],"driving":[4],"scenes":[5,112],"requires":[6],"3D":[7,32,50,61,101,139],"representations":[8],"that":[9,93,148],"remain":[10],"consistent":[11],"across":[12],"multiple":[13],"viewpoints":[14],"and":[15,38,99,166],"scale":[16,109],"to":[17,31,42,48,104,110,132,175],"large":[18,111],"areas.":[19],"Existing":[20],"solutions":[21],"either":[22],"rely":[23],"on":[24,65,95],"image":[25],"or":[26,45,52],"video":[27],"generative":[28,62],"models":[29],"distilled":[30],"space,":[33],"harming":[34],"the":[35,40,123],"geometric":[36],"coherence":[37],"restricting":[39],"rendering":[41,130],"training":[43],"views,":[44],"are":[46],"limited":[47],"small-scale":[49],"scene":[51,140],"object-centric":[53],"generation.":[54],"In":[55],"this":[56,85],"work,":[57],"we":[58,87,121],"propose":[59],"a":[60,68,76,89,128],"framework":[63],"based":[64],"$\u03a3$-Voxfield":[66,125],"grid,":[67],"discrete":[69],"representation":[70],"where":[71],"each":[72],"occupied":[73],"voxel":[74,97],"stores":[75],"fixed":[77],"number":[78],"colorized":[80],"surface":[81],"samples.":[82],"To":[83],"generate":[84,152],"representation,":[86],"train":[88],"semantic-conditioned":[90],"diffusion":[91],"model":[92],"operates":[94],"local":[96],"neighborhoods":[98],"uses":[100],"positional":[102],"encodings":[103],"capture":[105],"spatial":[106,115],"structure.":[107],"We":[108],"via":[113],"progressive":[114],"outpainting":[116],"over":[117],"overlapping":[118],"regions.":[119],"Finally,":[120],"render":[122],"generated":[124],"grid":[126],"with":[127,162],"deferred":[129],"module":[131],"obtain":[133],"photorealistic":[134,160],"images,":[135],"enabling":[136],"large-scale":[137,154],"multiview-consistent":[138],"without":[142],"per-scene":[143],"optimization.":[144],"Extensive":[145],"experiments":[146],"show":[147],"our":[149],"approach":[150],"can":[151],"diverse":[153],"urban":[155],"scenes,":[157],"renderable":[158],"into":[159],"images":[161],"various":[163],"sensor":[164],"configurations":[165],"camera":[167],"trajectories":[168],"while":[169],"maintaining":[170],"moderate":[171],"computation":[172],"cost":[173],"compared":[174],"existing":[176],"approaches.":[177]},"counts_by_year":[],"updated_date":"2026-04-09T06:13:59.934233","created_date":"2026-04-09T00:00:00"}
