{"id":"https://openalex.org/W7155043732","doi":"https://doi.org/10.48550/arxiv.2604.17969","title":"E3VS-Bench: A Benchmark for Viewpoint-Dependent Active Perception in 3D Gaussian Splatting Scenes","display_name":"E3VS-Bench: A Benchmark for Viewpoint-Dependent Active Perception in 3D Gaussian Splatting Scenes","publication_year":2026,"publication_date":"2026-04-20","ids":{"openalex":"https://openalex.org/W7155043732","doi":"https://doi.org/10.48550/arxiv.2604.17969"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.17969","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17969","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.17969","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102058289","display_name":"Koya Sakamoto","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sakamoto, Koya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006630406","display_name":"Taiki Miyanishi","orcid":"https://orcid.org/0000-0001-9105-1601"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miyanishi, Taiki","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031443776","display_name":"Daichi Azuma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Azuma, Daichi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073732915","display_name":"Shuhei Kurita","orcid":"https://orcid.org/0000-0001-7415-3120"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kurita, Shuhei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056146974","display_name":"Shu Morikuni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Morikuni, Shu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027888497","display_name":"Naoya Chiba","orcid":"https://orcid.org/0000-0003-3332-4426"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chiba, Naoya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004328317","display_name":"Motoaki Kawanabe","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kawanabe, Motoaki","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134140887","display_name":"Yusuke Iwasawa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Iwasawa, Yusuke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134102776","display_name":"Yutaka Matsuo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matsuo, Yutaka","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5102058289"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7185999751091003,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7185999751091003,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.09189999848604202,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.04650000110268593,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.6686000227928162},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6362000107765198},{"id":"https://openalex.org/keywords/visual-hull","display_name":"Visual hull","score":0.5464000105857849},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.5281999707221985},{"id":"https://openalex.org/keywords/active-vision","display_name":"Active vision","score":0.5180000066757202},{"id":"https://openalex.org/keywords/viewpoints","display_name":"Viewpoints","score":0.5070000290870667},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4643000066280365},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.45879998803138733},{"id":"https://openalex.org/keywords/visual-search","display_name":"Visual search","score":0.4471000134944916}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8237000107765198},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6823999881744385},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.6686000227928162},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6362000107765198},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6341000199317932},{"id":"https://openalex.org/C2776863239","wikidata":"https://www.wikidata.org/wiki/Q7936601","display_name":"Visual hull","level":3,"score":0.5464000105857849},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.5281999707221985},{"id":"https://openalex.org/C193611912","wikidata":"https://www.wikidata.org/wiki/Q4677596","display_name":"Active vision","level":2,"score":0.5180000066757202},{"id":"https://openalex.org/C2776035091","wikidata":"https://www.wikidata.org/wiki/Q7928819","display_name":"Viewpoints","level":2,"score":0.5070000290870667},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4643000066280365},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.45879998803138733},{"id":"https://openalex.org/C158495155","wikidata":"https://www.wikidata.org/wiki/Q2369151","display_name":"Visual search","level":2,"score":0.4471000134944916},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3889000117778778},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.382099986076355},{"id":"https://openalex.org/C2776010242","wikidata":"https://www.wikidata.org/wiki/Q4677575","display_name":"Active perception","level":3,"score":0.36649999022483826},{"id":"https://openalex.org/C83248878","wikidata":"https://www.wikidata.org/wiki/Q344000","display_name":"Active appearance model","level":3,"score":0.362199991941452},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.3587999939918518},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.3499000072479248},{"id":"https://openalex.org/C123403432","wikidata":"https://www.wikidata.org/wiki/Q654068","display_name":"Visibility","level":2,"score":0.3391999900341034},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.32589998841285706},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.3197999894618988},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.3037000000476837},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.29089999198913574},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.29089999198913574},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.27869999408721924},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C3019007443","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3d model","level":2,"score":0.2705000042915344},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2689000070095062},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.26510000228881836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.17969","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17969","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.17969","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17969","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.42035147547721863}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Visual":[0],"search":[1,20,93],"in":[2,52,100,145,169,197],"3D":[3,54,91,114,118,125],"environments":[4],"requires":[5],"embodied":[6,22,90],"agents":[7,95],"to":[8,102],"actively":[9],"explore":[10],"their":[11,98,178],"surroundings":[12],"and":[13,21,36,69,121,140,162,176,200],"acquire":[14],"task-relevant":[15],"evidence.":[16],"However,":[17],"existing":[18],"visual":[19,92,135],"AI":[23],"benchmarks,":[24],"including":[25],"EQA,":[26],"typically":[27],"rely":[28],"on":[29],"static":[30],"observations":[31],"or":[32],"constrained":[33],"egocentric":[34],"motion,":[35],"thus":[37],"do":[38],"not":[39],"explicitly":[40],"evaluate":[41,172],"fine-grained":[42,134],"viewpoint-dependent":[43,104],"phenomena":[44],"that":[45,73,132,154],"arise":[46],"under":[47,205],"unrestricted":[48],"5-DoF":[49,101,207],"viewpoint":[50,63,202,208],"control":[51,97],"real-world":[53],"environments,":[55],"such":[56],"as":[57],"visibility":[58],"changes":[59],"caused":[60],"by":[61],"vertical":[62],"shifts,":[64],"revealing":[65],"contents":[66],"inside":[67],"containers,":[68],"disambiguating":[70],"object":[71],"attributes":[72],"are":[74],"only":[75],"observable":[76],"from":[77,158,193],"specific":[78],"angles.":[79],"To":[80],"address":[81],"this":[82],"limitation,":[83],"we":[84],"introduce":[85],"{E3VS-Bench},":[86],"a":[87,159,190],"benchmark":[88],"for":[89,106],"where":[94],"must":[96],"viewpoints":[99,168],"gather":[103],"evidence":[105],"question":[107],"answering.":[108],"E3VS-Bench":[109],"consists":[110],"of":[111,152],"99":[112],"high-fidelity":[113],"scenes":[115],"reconstructed":[116],"using":[117],"Gaussian":[119,126],"Splatting":[120,127],"2,014":[122],"question-driven":[123],"episodes.":[124],"enables":[128],"photorealistic":[129],"free-viewpoint":[130],"rendering":[131],"preserves":[133],"details":[136],"(e.g.,":[137],"small":[138],"text":[139],"subtle":[141],"attributes)":[142],"often":[143],"degraded":[144],"mesh-based":[146],"simulators,":[147],"thereby":[148],"allowing":[149],"the":[150],"construction":[151],"questions":[153],"cannot":[155],"be":[156],"answered":[157],"single":[160],"view":[161],"instead":[163],"require":[164],"active":[165,198],"inspection":[166],"across":[167],"5-DoF.":[170],"We":[171],"multiple":[173],"state-of-the-art":[174],"VLMs":[175],"compare":[177],"performance":[179],"with":[180],"humans.":[181],"Despite":[182],"strong":[183],"2D":[184],"reasoning":[185],"ability,":[186],"all":[187],"models":[188],"exhibit":[189],"substantial":[191],"gap":[192],"humans,":[194],"highlighting":[195],"limitations":[196],"perception":[199],"coherent":[201],"planning":[203],"specifically":[204],"full":[206],"changes.":[209]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-22T00:00:00"}
