{"id":"https://openalex.org/W7147728849","doi":"https://doi.org/10.48550/arxiv.2603.28980","title":"Stepper: Stepwise Immersive Scene Generation with Multiview Panoramas","display_name":"Stepper: Stepwise Immersive Scene Generation with Multiview Panoramas","publication_year":2026,"publication_date":"2026-03-30","ids":{"openalex":"https://openalex.org/W7147728849","doi":"https://doi.org/10.48550/arxiv.2603.28980"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.28980","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28980","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.28980","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070803502","display_name":"Felix Wimbauer","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wimbauer, Felix","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006401089","display_name":"Fabian Manhardt","orcid":"https://orcid.org/0000-0002-4577-4590"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Manhardt, Fabian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072305321","display_name":"Michael Oechsle","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oechsle, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060410510","display_name":"Nikolai Kalischek","orcid":"https://orcid.org/0000-0002-4137-0142"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kalischek, Nikolai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132709453","display_name":"Christian Rupprecht","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rupprecht, Christian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132703825","display_name":"Daniel Cremers","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cremers, Daniel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132726697","display_name":"Federico Tombari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tombari, Federico","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5070803502"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4945000112056732,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.4945000112056732,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.1703999936580658,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.0869000032544136,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/panorama","display_name":"Panorama","score":0.7168999910354614},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.542900025844574},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4796999990940094},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4465999901294708},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.4221000075340271},{"id":"https://openalex.org/keywords/view-synthesis","display_name":"View synthesis","score":0.3582000136375427},{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.35100001096725464}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7825999855995178},{"id":"https://openalex.org/C2780580889","wikidata":"https://www.wikidata.org/wiki/Q41363","display_name":"Panorama","level":2,"score":0.7168999910354614},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7142999768257141},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.7055000066757202},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.542900025844574},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.4862000048160553},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4796999990940094},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4465999901294708},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.4221000075340271},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.3582000136375427},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.35100001096725464},{"id":"https://openalex.org/C2776863239","wikidata":"https://www.wikidata.org/wiki/Q7936601","display_name":"Visual hull","level":3,"score":0.3273000121116638},{"id":"https://openalex.org/C1223959","wikidata":"https://www.wikidata.org/wiki/Q1191960","display_name":"Stereo display","level":2,"score":0.3138999938964844},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.29319998621940613},{"id":"https://openalex.org/C173552908","wikidata":"https://www.wikidata.org/wiki/Q1366289","display_name":"Graphics pipeline","level":4,"score":0.2921999990940094},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2921000123023987},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2596000134944916},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.28980","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28980","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.28980","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28980","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"synthesis":[1,75],"of":[2],"immersive":[3,72,132],"3D":[4,19,73],"scenes":[5],"from":[6,41,52],"text":[7],"is":[8,59],"rapidly":[9],"maturing,":[10],"driven":[11],"by":[12],"novel":[13,88],"video":[14,57],"generative":[15],"models":[16],"and":[17,26,47,120],"feed-forward":[18],"reconstruction,":[20],"with":[21,99],"vast":[22],"potential":[23],"in":[24],"AR/VR":[25],"world":[27],"modeling.":[28],"While":[29],"panoramic":[30,56,82],"images":[31],"have":[32],"proven":[33],"effective":[34],"for":[35,70,131],"scene":[36,74,83,133],"initialization,":[37],"existing":[38],"approaches":[39],"suffer":[40],"a":[42,67,87,100,110,128],"trade-off":[43],"between":[44],"visual":[45],"fidelity":[46,119],"explorability:":[48],"autoregressive":[49],"expansion":[50],"suffers":[51],"context":[53],"drift,":[54],"while":[55],"generation":[58],"limited":[60],"to":[61],"low":[62],"resolution.":[63],"We":[64],"present":[65],"Stepper,":[66],"unified":[68],"framework":[69],"text-driven":[71],"that":[76,93,104],"circumvents":[77],"these":[78],"limitations":[79],"via":[80],"stepwise":[81],"expansion.":[84],"Stepper":[85,116],"leverages":[86],"multi-view":[89,113],"360\u00b0":[90],"diffusion":[91],"model":[92],"enables":[94],"consistent,":[95],"high-resolution":[96],"expansion,":[97],"coupled":[98],"geometry":[101],"reconstruction":[102],"pipeline":[103],"enforces":[105],"geometric":[106],"coherence.":[107],"Trained":[108],"on":[109],"new":[111,129],"large-scale,":[112],"panorama":[114],"dataset,":[115],"achieves":[117],"state-of-the-art":[118],"structural":[121],"consistency,":[122],"outperforming":[123],"prior":[124],"approaches,":[125],"thereby":[126],"setting":[127],"standard":[130],"generation.":[134]},"counts_by_year":[],"updated_date":"2026-04-02T13:53:19.096889","created_date":"2026-04-02T00:00:00"}
