{"id":"https://openalex.org/W7135194298","doi":"https://doi.org/10.48550/arxiv.2603.11554","title":"MANSION: Multi-floor lANguage-to-3D Scene generatIOn for loNg-horizon tasks","display_name":"MANSION: Multi-floor lANguage-to-3D Scene generatIOn for loNg-horizon tasks","publication_year":2026,"publication_date":"2026-03-12","ids":{"openalex":"https://openalex.org/W7135194298","doi":"https://doi.org/10.48550/arxiv.2603.11554"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.11554","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11554","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.11554","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047614319","display_name":"L Che","orcid":"https://orcid.org/0009-0004-9769-5140"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Che, Lirong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111006735","display_name":"Shuo Wen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Shuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129076404","display_name":"Shan Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Shan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129086469","display_name":"Chuang Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Chuang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128990350","display_name":"Yuzhe Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yuzhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075441381","display_name":"Gregory Dudek","orcid":"https://orcid.org/0000-0001-5040-4925"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dudek, Gregory","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129056806","display_name":"Xueqian Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xueqian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129043990","display_name":"Jian Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Jian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7095000147819519,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7095000147819519,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.06949999928474426,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.030500000342726707,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/testbed","display_name":"Testbed","score":0.7508000135421753},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7268000245094299},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.4262999892234802},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.40639999508857727},{"id":"https://openalex.org/keywords/span","display_name":"Span (engineering)","score":0.3693999946117401},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.33739998936653137}],"concepts":[{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.7508000135421753},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7268000245094299},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.715499997138977},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4830999970436096},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44839999079704285},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.4262999892234802},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.40639999508857727},{"id":"https://openalex.org/C2778753569","wikidata":"https://www.wikidata.org/wiki/Q1960395","display_name":"Span (engineering)","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.33739998936653137},{"id":"https://openalex.org/C115051666","wikidata":"https://www.wikidata.org/wiki/Q6522493","display_name":"Ranging","level":2,"score":0.335999995470047},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.3043999969959259},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.2671000063419342}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.11554","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11554","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.11554","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11554","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"score":0.7287805676460266,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Real-world":[0],"robotic":[1],"tasks":[2],"are":[3,18],"long-horizon":[4,69],"and":[5,65,131],"often":[6],"span":[7],"multiple":[8],"floors,":[9],"demanding":[10],"rich":[11],"spatial":[12,129],"reasoning.":[13],"However,":[14],"existing":[15],"embodied":[16],"benchmarks":[17],"largely":[19],"confined":[20],"to":[21,26,88,103],"single-floor":[22],"in-house":[23],"environments,":[24],"failing":[25],"reflect":[27],"the":[28,36,63,125],"complexity":[29],"of":[30,48,67,80,128],"real-world":[31],"tasks.":[32,70],"We":[33],"introduce":[34],"MANSION,":[35],"first":[37],"language-driven":[38],"framework":[39],"for":[40,124],"generating":[41],"building-scale,":[42],"multi-floor":[43],"3D":[44],"environments.":[45],"Being":[46],"aware":[47],"vertical":[49],"structural":[50],"constraints,":[51],"MANSION":[52,119],"generates":[53],"realistic,":[54],"navigable":[55],"whole-building":[56],"structures":[57],"with":[58],"diverse,":[59],"human-friendly":[60],"scenes,":[61],"enabling":[62],"development":[64],"evaluation":[66],"cross-floor":[68],"Building":[71],"on":[72],"this":[73],"framework,":[74],"we":[75],"release":[76],"MansionWorld,":[77],"a":[78,91,121],"dataset":[79],"over":[81],"1,000":[82],"diverse":[83],"buildings":[84],"ranging":[85],"from":[86],"hospitals":[87],"offices,":[89],"alongside":[90],"Task-Semantic":[92],"Scene":[93],"Editing":[94],"Agent":[95],"that":[96,110],"customizes":[97],"these":[98],"environments":[99],"using":[100],"open-vocabulary":[101],"commands":[102],"meet":[104],"specific":[105],"user":[106],"needs.":[107],"Benchmarking":[108],"reveals":[109],"state-of-the-art":[111],"agents":[112],"degrade":[113],"sharply":[114],"in":[115],"our":[116],"settings,":[117],"establishing":[118],"as":[120],"critical":[122],"testbed":[123],"next":[126],"generation":[127],"reasoning":[130],"planning.":[132]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-03-14T00:00:00"}
