{"id":"https://openalex.org/W4405714929","doi":"https://doi.org/10.1109/3dv69130.2026.00165","title":"SCENIC: Scene-Aware Semantic Navigation with Instruction-Guided Control","display_name":"SCENIC: Scene-Aware Semantic Navigation with Instruction-Guided Control","publication_year":2026,"publication_date":"2026-03-20","ids":{"openalex":"https://openalex.org/W4405714929","doi":"https://doi.org/10.1109/3dv69130.2026.00165"},"language":"en","primary_location":{"id":"doi:10.1109/3dv69130.2026.00165","is_oa":false,"landing_page_url":"https://doi.org/10.1109/3dv69130.2026.00165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on 3D Vision (3DV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2412.15664","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100431071","display_name":"Xiaohan Zhang","orcid":"https://orcid.org/0000-0002-1118-7582"},"institutions":[{"id":"https://openalex.org/I143910747","display_name":"TH Bingen University of Applied Sciences","ror":"https://ror.org/01pxkj057","country_code":"DE","type":"education","lineage":["https://openalex.org/I143910747"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Xiaohan Zhang","raw_affiliation_strings":["T&#x00FC;bingen AI Center, University of T&#x00FC;bingen"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"T&#x00FC;bingen AI Center, University of T&#x00FC;bingen","institution_ids":["https://openalex.org/I143910747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102811169","display_name":"Sebastian Starke","orcid":"https://orcid.org/0000-0002-4519-4326"},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sebastian Starke","raw_affiliation_strings":["Meta Reality Labs Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta Reality Labs Research","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019960170","display_name":"Vladimir Guzov","orcid":"https://orcid.org/0000-0003-1304-5577"},"institutions":[{"id":"https://openalex.org/I143910747","display_name":"TH Bingen University of Applied Sciences","ror":"https://ror.org/01pxkj057","country_code":"DE","type":"education","lineage":["https://openalex.org/I143910747"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Vladimir Guzov","raw_affiliation_strings":["T&#x00FC;bingen AI Center, University of T&#x00FC;bingen"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"T&#x00FC;bingen AI Center, University of T&#x00FC;bingen","institution_ids":["https://openalex.org/I143910747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043901952","display_name":"Zhensong Zhang","orcid":"https://orcid.org/0009-0001-7911-7564"},"institutions":[{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Zhensong Zhang","raw_affiliation_strings":["Huawei Noah&#x0027;s Ark Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah&#x0027;s Ark Lab","institution_ids":["https://openalex.org/I4210159102"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012064717","display_name":"Eduardo P\u00e9rez Pellitero","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159102","display_name":"Huawei Technologies (Sweden)","ror":"https://ror.org/0500fyd17","country_code":"SE","type":"company","lineage":["https://openalex.org/I2250955327","https://openalex.org/I4210159102"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Eduardo P\u00e9rez-Pellitero","raw_affiliation_strings":["Huawei Noah&#x0027;s Ark Lab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huawei Noah&#x0027;s Ark Lab","institution_ids":["https://openalex.org/I4210159102"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076908763","display_name":"Gerard Pons\u2010Moll","orcid":"https://orcid.org/0000-0001-5115-7794"},"institutions":[{"id":"https://openalex.org/I143910747","display_name":"TH Bingen University of Applied Sciences","ror":"https://ror.org/01pxkj057","country_code":"DE","type":"education","lineage":["https://openalex.org/I143910747"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gerard Pons-Moll","raw_affiliation_strings":["T&#x00FC;bingen AI Center, University of T&#x00FC;bingen"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"T&#x00FC;bingen AI Center, University of T&#x00FC;bingen","institution_ids":["https://openalex.org/I143910747"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00408083,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1738","last_page":"1749"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.9807000160217285,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5927214026451111},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.5853356719017029},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3569497764110565},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3280913829803467}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5927214026451111},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.5853356719017029},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3569497764110565},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3280913829803467}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/3dv69130.2026.00165","is_oa":false,"landing_page_url":"https://doi.org/10.1109/3dv69130.2026.00165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on 3D Vision (3DV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2412.15664","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.15664","pdf_url":"https://arxiv.org/pdf/2412.15664","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2412.15664","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2412.15664","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2412.15664","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.15664","pdf_url":"https://arxiv.org/pdf/2412.15664","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Synthesizing":[0],"natural":[1,68],"human":[2,53,160,234],"motion":[3,18,36,54,162,200,213],"that":[4,55,157,178,215,226,236],"adapts":[5,56],"to":[6,34,51,57,239,248,257],"complex":[7,79,240],"environments":[8],"while":[9,63,82,109],"allowing":[10],"creative":[11],"control":[12,35,66],"remains":[13],"a":[14,47,125,130,142,153,173,198],"fundamental":[15],"challenge":[16,73],"in":[17,75],"synthesis.":[19],"Existing":[20],"models":[21],"often":[22],"fall":[23],"short,":[24],"either":[25],"by":[26,183],"assuming":[27],"flat":[28],"terrain":[29,244],"or":[30,121],"lacking":[31],"the":[32,136,159],"ability":[33],"semantics":[37],"through":[38,67],"text.":[39],"To":[40,191],"address":[41],"these":[42],"limitations,":[43],"we":[44,196,252],"introduce":[45],"SCENIC,":[46],"diffusion":[48,201,206,229],"model":[49,98,230],"designed":[50],"generate":[52],"dynamic":[58],"terrains":[59],"within":[60],"virtual":[61],"scenes":[62,241],"enabling":[64,168,211],"semantic":[65,221],"language.":[69],"The":[70,97],"key":[71,151],"technical":[72],"lies":[74],"simultaneously":[76],"reasoning":[77,133,146],"about":[78],"scene":[80,132,145,166,186,218],"geometry":[81],"maintaining":[83],"text":[84,113,222],"control.":[85],"This":[86],"requires":[87],"understanding":[88],"both":[89,217,237],"high-level":[90,189],"navigation":[91,105],"goals":[92],"and":[93,103,172,194,203,220,246],"fine-grained":[94],"environmental":[95],"constraints.":[96],"must":[99],"ensure":[100,192],"physical":[101],"plausibility":[102,193],"precise":[104,169],"across":[106],"varied":[107],"terrain,":[108],"also":[110],"preserving":[111],"user-specified":[112],"control,":[114],"such":[115],"as":[116],"\u201ccarefully":[117],"stepping":[118],"over":[119],"obstacles\u201d":[120],"\u201cwalking":[122],"upstairs":[123],"like":[124],"zombie.\u201d":[126],"Our":[127],"solution":[128],"introduces":[129],"hierarchical":[131,144],"approach.":[134],"At":[135],"core":[137],"of":[138],"our":[139,227],"method":[140],"is":[141],"novel":[143,228],"framework.":[147],"It":[148],"combines":[149],"two":[150],"components:":[152],"motion-scene":[154],"cross-attention":[155],"block":[156],"aligns":[158],"body's":[161],"features":[163],"with":[164,242],"local":[165],"geometry,":[167],"low-level":[170],"interactions;":[171],"target":[174,185],"point":[175],"canonicalization":[176],"module":[177],"provides":[179],"global":[180],"goal":[181],"conditioning":[182],"normalizing":[184],"coordinates":[187],"for":[188],"guidance.":[190],"naturalness,":[195],"leverage":[197],"pre-trained":[199],"prior":[202],"apply":[204],"scene-constrained":[205],"noise":[207],"optimization":[208],"during":[209],"sampling,":[210],"long-horizon":[212],"generation":[214],"respects":[216],"structure":[219],"input.":[223],"Experiments":[224],"demonstrate":[225],"generates":[231],"arbitrarily":[232],"long":[233],"motions":[235],"adapt":[238],"varying":[243],"surfaces":[245],"respond":[247],"textual":[249],"prompts.":[250],"Additionally,":[251],"show":[253],"SCENIC":[254],"can":[255],"generalize":[256],"four":[258],"real-scene":[259],"datasets.":[260]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
