{"id":"https://openalex.org/W7123767570","doi":"https://doi.org/10.48550/arxiv.2601.06781","title":"AutoTour: Automatic Photo Tour Guide with Smartphones and LLMs","display_name":"AutoTour: Automatic Photo Tour Guide with Smartphones and LLMs","publication_year":2026,"publication_date":"2026-01-11","ids":{"openalex":"https://openalex.org/W7123767570","doi":"https://doi.org/10.48550/arxiv.2601.06781"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.06781","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.06781","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.06781","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078174269","display_name":"Huatao Xu","orcid":"https://orcid.org/0000-0002-8289-0910"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Huatao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122918562","display_name":"Zihe Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zihe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122986579","display_name":"Zilin Zeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Zilin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122920665","display_name":"Baichuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Baichuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100361467","display_name":"Mo Li","orcid":"https://orcid.org/0000-0002-9580-791X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Mo","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5078174269"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.7829999923706055,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.7829999923706055,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.11779999732971191,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11094","display_name":"Face Recognition and Perception","score":0.01269999984651804,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/geospatial-analysis","display_name":"Geospatial analysis","score":0.8356000185012817},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.675000011920929},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6269999742507935},{"id":"https://openalex.org/keywords/landmark","display_name":"Landmark","score":0.5709999799728394},{"id":"https://openalex.org/keywords/geotagging","display_name":"Geotagging","score":0.5170999765396118},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4738999903202057},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4361000061035156},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4332999885082245},{"id":"https://openalex.org/keywords/geocoding","display_name":"Geocoding","score":0.4327000081539154}],"concepts":[{"id":"https://openalex.org/C9770341","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Geospatial analysis","level":2,"score":0.8356000185012817},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7354999780654907},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.675000011920929},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6269999742507935},{"id":"https://openalex.org/C2780297707","wikidata":"https://www.wikidata.org/wiki/Q4895393","display_name":"Landmark","level":2,"score":0.5709999799728394},{"id":"https://openalex.org/C53605480","wikidata":"https://www.wikidata.org/wiki/Q852595","display_name":"Geotagging","level":2,"score":0.5170999765396118},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4738999903202057},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4361000061035156},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4332999885082245},{"id":"https://openalex.org/C42629822","wikidata":"https://www.wikidata.org/wiki/Q1346408","display_name":"Geocoding","level":2,"score":0.4327000081539154},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.42579999566078186},{"id":"https://openalex.org/C60229501","wikidata":"https://www.wikidata.org/wiki/Q18822","display_name":"Global Positioning System","level":2,"score":0.40619999170303345},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3763999938964844},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.373199999332428},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37070000171661377},{"id":"https://openalex.org/C130745260","wikidata":"https://www.wikidata.org/wiki/Q5548542","display_name":"Geospatial PDF","level":3,"score":0.3596999943256378},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3334999978542328},{"id":"https://openalex.org/C142816647","wikidata":"https://www.wikidata.org/wiki/Q5573018","display_name":"Glyph (data visualization)","level":3,"score":0.32109999656677246},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3012000024318695},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.2946999967098236},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.29319998621940613},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C172367668","wikidata":"https://www.wikidata.org/wiki/Q6504956","display_name":"Data visualization","level":3,"score":0.27570000290870667},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.2728999853134155},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.271699994802475},{"id":"https://openalex.org/C12380541","wikidata":"https://www.wikidata.org/wiki/Q58300","display_name":"Geovisualization","level":4,"score":0.27070000767707825},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.2702000141143799},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.25940001010894775},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.06781","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.06781","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.06781","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.06781","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,158],"present":[1],"AutoTour,":[2],"a":[3,76,174],"system":[4],"that":[5,49,79,160,181],"enhances":[6],"user":[7,98],"exploration":[8,180],"by":[9,21,144],"automatically":[10],"generating":[11],"fine-grained":[12],"landmark":[13],"annotations":[14,166],"and":[15,60,67,82,104,128,136,149,170,185],"descriptive":[16],"narratives":[17],"for":[18,167],"photos":[19,35,99],"captured":[20],"users.":[22],"The":[23,130],"key":[24],"idea":[25],"of":[26,177],"AutoTour":[27,57,161],"is":[28],"to":[29,64,152],"fuse":[30],"visual":[31,183],"features":[32,39,86,118,132],"extracted":[33],"from":[34,41],"with":[36,119],"nearby":[37],"geospatial":[38,85,121,186],"queried":[40],"open":[42,59],"matching":[43,114],"databases.":[44],"Unlike":[45],"existing":[46],"tour":[47],"applications":[48],"rely":[50],"on":[51,124,139],"pre-defined":[52],"content":[53],"or":[54],"proprietary":[55],"datasets,":[56],"leverages":[58],"extensible":[61],"data":[62],"sources":[63],"provide":[65,153],"scalable":[66],"context-aware":[68,179],"photo-based":[69],"guidance.":[70],"To":[71],"achieve":[72],"this,":[73],"we":[74],"design":[75],"training-free":[77],"pipeline":[78],"first":[80],"extracts":[81],"filters":[83],"relevant":[84],"around":[87],"the":[88,108,140],"user's":[89],"GPS":[90],"location.":[91],"It":[92],"then":[93],"detects":[94],"major":[95],"landmarks":[96],"in":[97],"through":[100],"VLM-based":[101],"feature":[102],"detection":[103],"projects":[105],"them":[106],"into":[107],"horizontal":[109],"spatial":[110],"plane.":[111],"A":[112],"geometric":[113],"algorithm":[115],"aligns":[116],"photo":[117],"corresponding":[120],"entities":[122],"based":[123],"their":[125],"estimated":[126],"distance":[127],"direction.":[129],"matched":[131],"are":[133],"subsequently":[134],"grounded":[135],"annotated":[137],"directly":[138],"original":[141],"photo,":[142],"accompanied":[143],"large":[145],"language":[146],"model-generated":[147],"textual":[148],"audio":[150],"descriptions":[151],"an":[154],"informative,":[155],"tour-like":[156],"experience.":[157],"demonstrate":[159],"can":[162],"deliver":[163],"rich,":[164],"interpretable":[165],"both":[168],"iconic":[169],"lesser-known":[171],"landmarks,":[172],"enabling":[173],"new":[175],"form":[176],"interactive,":[178],"bridges":[182],"perception":[184],"understanding.":[187]},"counts_by_year":[],"updated_date":"2026-01-14T23:44:37.837170","created_date":"2026-01-14T00:00:00"}
