{"id":"https://openalex.org/W7116928289","doi":"https://doi.org/10.1145/3785482","title":"CVAF: A CLIP-Based View-Consistent Alignment Framework for Aerial-Ground Person Re-Identification","display_name":"CVAF: A CLIP-Based View-Consistent Alignment Framework for Aerial-Ground Person Re-Identification","publication_year":2025,"publication_date":"2025-12-23","ids":{"openalex":"https://openalex.org/W7116928289","doi":"https://doi.org/10.1145/3785482"},"language":"en","primary_location":{"id":"doi:10.1145/3785482","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3785482","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121077987","display_name":"Dongxu Mao","orcid":null},"institutions":[{"id":"https://openalex.org/I78675632","display_name":"Beijing Information Science & Technology University","ror":"https://ror.org/04xnqep60","country_code":"CN","type":"education","lineage":["https://openalex.org/I78675632"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dongxu Mao","raw_affiliation_strings":["School of Computer Science, Beijing Information Science and Technology University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-6134-1620","affiliations":[{"raw_affiliation_string":"School of Computer Science, Beijing Information Science and Technology University, Beijing, China","institution_ids":["https://openalex.org/I78675632"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045234139","display_name":"Shangzhi Teng","orcid":"https://orcid.org/0000-0001-7098-9932"},"institutions":[{"id":"https://openalex.org/I78675632","display_name":"Beijing Information Science & Technology University","ror":"https://ror.org/04xnqep60","country_code":"CN","type":"education","lineage":["https://openalex.org/I78675632"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shangzhi Teng","raw_affiliation_strings":["School of Computer Science, Beijing Information Science and Technology University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7098-9932","affiliations":[{"raw_affiliation_string":"School of Computer Science, Beijing Information Science and Technology University, Beijing, China","institution_ids":["https://openalex.org/I78675632"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113823067","display_name":"Xueqiang Lv","orcid":null},"institutions":[{"id":"https://openalex.org/I78675632","display_name":"Beijing Information Science & Technology University","ror":"https://ror.org/04xnqep60","country_code":"CN","type":"education","lineage":["https://openalex.org/I78675632"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueqiang Lyu","raw_affiliation_strings":["School of Computer Science, Beijing Information Science and Technology University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-1422-0560","affiliations":[{"raw_affiliation_string":"School of Computer Science, Beijing Information Science and Technology University, Beijing, China","institution_ids":["https://openalex.org/I78675632"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5121077987"],"corresponding_institution_ids":["https://openalex.org/I78675632"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.60918331,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"22","issue":"3","first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.7954000234603882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.7954000234603882,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.06319999694824219,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.05000000074505806,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/viewpoints","display_name":"Viewpoints","score":0.6632000207901001},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6197999715805054},{"id":"https://openalex.org/keywords/clutter","display_name":"Clutter","score":0.6022999882698059},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5019999742507935},{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.4869999885559082},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.41819998621940613},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3882000148296356}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9039999842643738},{"id":"https://openalex.org/C2776035091","wikidata":"https://www.wikidata.org/wiki/Q7928819","display_name":"Viewpoints","level":2,"score":0.6632000207901001},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6197999715805054},{"id":"https://openalex.org/C132094186","wikidata":"https://www.wikidata.org/wiki/Q641585","display_name":"Clutter","level":3,"score":0.6022999882698059},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5580999851226807},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5019999742507935},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.4869999885559082},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.41819998621940613},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3882000148296356},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.37869998812675476},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3662000000476837},{"id":"https://openalex.org/C2778493491","wikidata":"https://www.wikidata.org/wiki/Q7449072","display_name":"Semantic matching","level":3,"score":0.36559998989105225},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.3278000056743622},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3127000033855438},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.28299999237060547},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2815000116825104},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2802000045776367},{"id":"https://openalex.org/C2778180026","wikidata":"https://www.wikidata.org/wiki/Q18378163","display_name":"Semantic heterogeneity","level":4,"score":0.27160000801086426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3785482","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3785482","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","score":0.6250017881393433,"display_name":"Sustainable cities and communities"}],"awards":[{"id":"https://openalex.org/G5496950214","display_name":null,"funder_award_id":"62202061 and 62171043","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1982925187","https://openalex.org/W2204750386","https://openalex.org/W2795758732","https://openalex.org/W2963047834","https://openalex.org/W2963842104","https://openalex.org/W2968374502","https://openalex.org/W3034580371","https://openalex.org/W3035186652","https://openalex.org/W3175823695","https://openalex.org/W3198377975","https://openalex.org/W4312361652","https://openalex.org/W4388145471","https://openalex.org/W4389318018","https://openalex.org/W4389920963","https://openalex.org/W4390970425","https://openalex.org/W4402716228","https://openalex.org/W4402754290","https://openalex.org/W4403790874"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,33,96,125,134,201],"increasing":[2],"adoption":[3],"of":[4,127,180],"UAV":[5],"platforms":[6],"in":[7],"areas":[8],"such":[9],"as":[10,22],"public":[11],"safety":[12],"and":[13,43,152,174,187],"smart":[14],"cities,":[15],"Aerial-Ground":[16],"Person":[17],"Re-Identification":[18],"(AGPReID)":[19],"has":[20],"emerged":[21],"a":[23,85,118,139],"crucial":[24],"yet":[25],"highly":[26],"challenging":[27],"task,":[28],"garnering":[29],"growing":[30],"interest":[31],"from":[32],"research":[34],"community.":[35],"While":[36],"existing":[37],"approaches":[38],"have":[39],"leveraged":[40],"identity":[41,77],"attributes":[42],"viewpoint":[44],"disentanglement":[45],"strategies":[46],"to":[47,75,104,163],"improve":[48],"cross-view":[49],"matching,":[50],"their":[51],"heavy":[52],"reliance":[53],"on":[54,192],"prior":[55],"knowledge":[56],"often":[57],"compromises":[58],"model":[59],"generalization.":[60],"Furthermore,":[61],"some":[62],"methods":[63],"that":[64,123,144,197],"explicitly":[65,164],"separate":[66],"viewpoints":[67],"may":[68],"unintentionally":[69],"discard":[70],"identity-related,":[71],"view-invariant":[72],"features,":[73],"leading":[74],"incomplete":[76],"representations.":[78],"To":[79,109],"address":[80],"these":[81],"limitations,":[82],"we":[83,116,137],"propose":[84],"CLIP-based":[86],"View-Consistent":[87],"Alignment":[88],"Framework":[89],"(CVAF)":[90],"with":[91,130],"two":[92],"training":[93],"stages.":[94],"In":[95,133],"first":[97],"stage,":[98,136],"learnable":[99],"text":[100],"tokens":[101,149,176],"are":[102],"employed":[103],"represent":[105],"identity-aware":[106],"textual":[107,172],"descriptions.":[108],"promote":[110],"consistent":[111],"alignment":[112],"across":[113],"varying":[114],"viewpoints,":[115],"introduce":[117],"Text":[119],"Consistency":[120],"Loss":[121],"(TCL)":[122],"regularizes":[124],"stability":[126],"text-token":[128],"interactions":[129],"multi-view":[131],"images.":[132],"second":[135],"present":[138],"Semantic":[140],"Filtering":[141],"Module":[142],"(SFM)":[143],"jointly":[145],"modulates":[146],"image":[147,181],"patch":[148],"along":[150],"spatial":[151,160],"channel":[153],"dimensions.":[154],"A":[155],"text-guided":[156],"cross-attention":[157],"mechanism":[158],"generates":[159],"attention":[161],"maps":[162],"emphasize":[165],"identity-relevant":[166],"regions,":[167],"while":[168],"semantic":[169],"matching":[170],"between":[171],"features":[173],"visual":[175],"enables":[177],"adaptive":[178],"reweighting":[179],"representations,":[182],"effectively":[183],"suppressing":[184],"background":[185],"clutter":[186],"view-specific":[188],"noise.":[189],"Extensive":[190],"experiments":[191],"multiple":[193],"AGPReID":[194],"datasets":[195],"demonstrate":[196],"our":[198],"CVAF":[199],"outperforms":[200],"state-of-the-art":[202],"methods.":[203]},"counts_by_year":[],"updated_date":"2026-03-01T06:05:34.837733","created_date":"2025-12-23T00:00:00"}
