{"id":"https://openalex.org/W7125152578","doi":"https://doi.org/10.48550/arxiv.2601.12729","title":"DC-VLAQ: Query-Residual Aggregation for Robust Visual Place Recognition","display_name":"DC-VLAQ: Query-Residual Aggregation for Robust Visual Place Recognition","publication_year":2026,"publication_date":"2026-01-19","ids":{"openalex":"https://openalex.org/W7125152578","doi":"https://doi.org/10.48550/arxiv.2601.12729"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.12729","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.12729","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.12729","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123516232","display_name":"Hanyu Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Hanyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122360008","display_name":"Zhihao Zhan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhan, Zhihao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123539677","display_name":"Yuhang Ming","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ming, Yuhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123476644","display_name":"Liang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Liang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122861412","display_name":"Dibo Hou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hou, Dibo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Civera, Javier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Civera, Javier","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5106276250","display_name":"Wanzeng Kong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kong, Wanzeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.5120999813079834,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.5120999813079834,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.19499999284744263,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.11829999834299088,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8499000072479248},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5192000269889832},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5174999833106995},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.49160000681877136},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4765999913215637},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.47429999709129333},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4729999899864197},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.4702000021934509},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.446399986743927}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8499000072479248},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7283999919891357},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6567000150680542},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5192000269889832},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5174999833106995},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.49160000681877136},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4765999913215637},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.47429999709129333},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4729999899864197},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.4702000021934509},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.446399986743927},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4262999892234802},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4065000116825104},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3774000108242035},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.34380000829696655},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.3212999999523163},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.31709998846054077},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3142000138759613},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2815000116825104},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.2782999873161316},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.2782000005245209},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.2581000030040741}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.12729","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.12729","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.12729","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.12729","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.76995849609375,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"One":[0],"of":[1,67,87,128,156],"the":[2,47,65,85,107,126,154],"central":[3],"challenges":[4,64],"in":[5,106,150],"visual":[6,30],"place":[7],"recognition":[8],"(VPR)":[9],"is":[10],"learning":[11],"a":[12,43,80,98,118,133],"robust":[13,91],"global":[14,70,92,135],"representation":[15],"that":[16,83,103,138,175],"remains":[17],"discriminative":[18,158],"under":[19,186],"large":[20],"viewpoint":[21],"changes,":[22],"illumination":[23],"variations,":[24],"and":[25,90,153,172,181,190],"severe":[26],"domain":[27,188],"shifts.":[28],"While":[29],"foundation":[31],"models":[32],"(VFMs)":[33],"provide":[34],"strong":[35,179],"local":[36,140],"features,":[37],"most":[38],"existing":[39,68],"methods":[40],"rely":[41],"on":[42,162],"single":[44],"model,":[45],"overlooking":[46],"complementary":[48,57,88,101,113],"cues":[49],"offered":[50],"by":[51,142],"different":[52],"VFMs.":[53],"However,":[54],"exploiting":[55],"such":[56],"information":[58],"inevitably":[59],"alters":[60],"token":[61],"distributions,":[62],"which":[63],"stability":[66,152],"query-based":[69],"aggregation":[71,136],"schemes.":[72],"To":[73],"address":[74],"these":[75],"challenges,":[76],"we":[77,95,124],"propose":[78,125],"DC-VLAQ,":[79],"representation-centric":[81],"framework":[82],"integrates":[84],"fusion":[86,102],"VFMs":[89],"aggregation.":[93],"Specifically,":[94],"first":[96],"introduce":[97],"lightweight":[99],"residual-guided":[100],"anchors":[104],"representations":[105],"DINOv2":[108],"feature":[109],"space":[110],"while":[111],"injecting":[112],"semantics":[114],"from":[115],"CLIP":[116],"through":[117],"learned":[119],"residual":[120,144],"correction.":[121],"In":[122],"addition,":[123],"Vector":[127],"Local":[129],"Aggregated":[130],"Queries":[131],"(VLAQ),":[132],"query--residual":[134],"scheme":[137],"encodes":[139],"tokens":[141],"their":[143],"responses":[145],"to":[146],"learnable":[147],"queries,":[148],"resulting":[149],"improved":[151],"preservation":[155],"fine-grained":[157],"cues.":[159],"Extensive":[160],"experiments":[161],"standard":[163],"VPR":[164],"benchmarks,":[165],"including":[166],"Pitts30k,":[167],"Tokyo24/7,":[168],"MSLS,":[169],"Nordland,":[170],"SPED,":[171],"AmsterTime,":[173],"demonstrate":[174],"DC-VLAQ":[176],"consistently":[177],"outperforms":[178],"baselines":[180],"achieves":[182],"state-of-the-art":[183],"performance,":[184],"particularly":[185],"challenging":[187],"shifts":[189],"long-term":[191],"appearance":[192],"changes.":[193]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-01-22T00:00:00"}
