{"id":"https://openalex.org/W7137982797","doi":"https://doi.org/10.1609/aaai.v40i12.37938","title":"VIR-Bench: Evaluating Geospatial and Temporal Understanding of MLLMs via Travel Video Itinerary Reconstruction","display_name":"VIR-Bench: Evaluating Geospatial and Temporal Understanding of MLLMs via Travel Video Itinerary Reconstruction","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137982797","doi":"https://doi.org/10.1609/aaai.v40i12.37938"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i12.37938","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i12.37938","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i12.37938","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129641638","display_name":"Hao Henry Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hao Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093054887","display_name":"Eiki Murata","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Eiki Murata","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061807804","display_name":"Lingfang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lingfang Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129660355","display_name":"Ayako Sato","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ayako Sato","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125763637","display_name":"So Fukuda","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"So Fukuda","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129715078","display_name":"Ziqi Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ziqi Yin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129729647","display_name":"Wentao Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wentao Hu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022881767","display_name":"Keisuke Nakao","orcid":"https://orcid.org/0000-0002-5260-1110"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Keisuke Nakao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129696807","display_name":"Yusuke Nakamura","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yusuke Nakamura","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120555545","display_name":"Sebastian Zwirner","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sebastian Zwirner","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036143841","display_name":"Yi-Chia Chen","orcid":"https://orcid.org/0000-0002-8321-8595"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi-Chia Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072118811","display_name":"Hiroyuki Otomo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hiroyuki Otomo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109372838","display_name":"Hiroki Ouchi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hiroki Ouchi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129679599","display_name":"Daisuke Kawahara","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daisuke Kawahara","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5129641638"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25055928,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"12","first_page":"9747","last_page":"9756"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.923799991607666,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.923799991607666,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.010099999606609344,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.004600000102072954,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/geospatial-analysis","display_name":"Geospatial analysis","score":0.7491000294685364},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.6917999982833862},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6912999749183655},{"id":"https://openalex.org/keywords/underpinning","display_name":"Underpinning","score":0.6626999974250793},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6105999946594238},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5296000242233276}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7883999943733215},{"id":"https://openalex.org/C9770341","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Geospatial analysis","level":2,"score":0.7491000294685364},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.6917999982833862},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6912999749183655},{"id":"https://openalex.org/C2780871342","wikidata":"https://www.wikidata.org/wiki/Q7883752","display_name":"Underpinning","level":2,"score":0.6626999974250793},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6105999946594238},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5296000242233276},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4392000138759613},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4104999899864197},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.35920000076293945},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.328000009059906},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.30390000343322754},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.29330000281333923},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C2985733770","wikidata":"https://www.wikidata.org/wiki/Q1233007","display_name":"Travel time","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.25029999017715454},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i12.37938","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i12.37938","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i12.37938","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i12.37938","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11","score":0.5236195921897888}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,126,164],"multimodal":[3],"large":[4],"language":[5],"models":[6,155],"(MLLMs)":[7],"have":[8],"significantly":[9],"enhanced":[10],"video":[11,22],"understanding":[12],"capabilities,":[13],"opening":[14],"new":[15],"possibilities":[16],"for":[17,48],"practical":[18],"applications.":[19,166],"Yet":[20],"current":[21],"benchmarks":[23,154],"focus":[24],"largely":[25,40],"on":[26],"indoor":[27],"scenes":[28],"or":[29],"short-range":[30],"outdoor":[31],"activities,":[32],"leaving":[33],"the":[34,107,136],"challenges":[35],"associated":[36],"with":[37],"long-distance":[38],"travel":[39,73],"unexplored.":[41],"Mastering":[42],"extended":[43,114],"geospatial-temporal":[44,90],"trajectories":[45],"is":[46],"critical":[47],"next-generation":[49],"MLLMs,":[50,97],"underpinning":[51],"real-world":[52],"tasks":[53],"such":[54],"as":[55,79],"embodied-AI":[56],"planning":[57],"and":[58,86,116],"navigation.":[59],"To":[60],"bridge":[61],"this":[62],"gap,":[63],"we":[64,120,128],"present":[65],"VIR-Bench,":[66],"a":[67,80,130],"novel":[68],"benchmark":[69],"consisting":[70],"of":[71,109],"200":[72],"videos":[74,111],"that":[75,95,112,134,148],"frames":[76],"itinerary":[77,145],"reconstruction":[78],"challenging":[81],"task":[82],"designed":[83],"to":[84,102],"evaluate":[85],"push":[87],"forward":[88],"MLLMs'":[89],"intelligence.":[91],"Experimental":[92],"results":[93],"reveal":[94],"state-of-the-art":[96],"including":[98],"proprietary":[99],"ones,":[100],"struggle":[101],"achieve":[103],"high":[104],"scores,":[105],"underscoring":[106],"difficulty":[108],"handling":[110],"span":[113],"spatial":[115],"temporal":[117],"scales.":[118],"Moreover,":[119],"conduct":[121],"an":[122],"in-depth":[123],"case":[124],"study":[125],"which":[127],"develop":[129],"prototype":[131],"travel-planning":[132],"agent":[133],"leverages":[135],"insights":[137],"gained":[138],"from":[139],"VIR-Bench.":[140],"The":[141],"agent\u2019s":[142],"markedly":[143],"improved":[144],"recommendations":[146],"verify":[147],"our":[149],"evaluation":[150],"protocol":[151],"not":[152],"only":[153],"effectively":[156],"but":[157],"also":[158],"translates":[159],"into":[160],"concrete":[161],"performance":[162],"gains":[163],"user-facing":[165]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
