{"id":"https://openalex.org/W4417409686","doi":"https://doi.org/10.48550/arxiv.2509.03032","title":"Background Matters Too: A Language-Enhanced Adversarial Framework for Person Re-Identification","display_name":"Background Matters Too: A Language-Enhanced Adversarial Framework for Person Re-Identification","publication_year":2025,"publication_date":"2025-09-03","ids":{"openalex":"https://openalex.org/W4417409686","doi":"https://doi.org/10.48550/arxiv.2509.03032"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2509.03032","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.03032","pdf_url":"https://arxiv.org/pdf/2509.03032","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2509.03032","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091265420","display_name":"Kaicong Huang","orcid":"https://orcid.org/0000-0001-9101-4376"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Huang, Kaicong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090225991","display_name":"Talha Azfar","orcid":"https://orcid.org/0000-0002-1293-5036"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Azfar, Talha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109345429","display_name":"Jack Reilly","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Reilly, Jack M.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120826602","display_name":"Thomas Guggisberg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guggisberg, Thomas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5049143775","display_name":"Ruimin Ke","orcid":"https://orcid.org/0000-0001-9139-6765"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ke, Ruimin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5091265420"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.8560000061988831,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.8560000061988831,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.03629999980330467,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.026200000196695328,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.7135999798774719},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.7010999917984009},{"id":"https://openalex.org/keywords/generality","display_name":"Generality","score":0.6984999775886536},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.597100019454956},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5666999816894531},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5271999835968018},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.45879998803138733},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4113999903202057},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.38260000944137573}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7454000115394592},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.7135999798774719},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.7010999917984009},{"id":"https://openalex.org/C2780767217","wikidata":"https://www.wikidata.org/wiki/Q5532421","display_name":"Generality","level":2,"score":0.6984999775886536},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.67330002784729},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.597100019454956},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5666999816894531},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5271999835968018},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.45879998803138733},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4113999903202057},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3856000006198883},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.38260000944137573},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3763999938964844},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.34529998898506165},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.33660000562667847},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.3190999925136566},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31439998745918274},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.30390000343322754},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3021000027656555},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3012999892234802},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2985999882221222},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.2935999929904938},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.27459999918937683},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.2727999985218048},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C32653426","wikidata":"https://www.wikidata.org/wiki/Q3813641","display_name":"Background subtraction","level":3,"score":0.2639999985694885},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.25}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2509.03032","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.03032","pdf_url":"https://arxiv.org/pdf/2509.03032","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2509.03032","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.03032","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2509.03032","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2509.03032","pdf_url":"https://arxiv.org/pdf/2509.03032","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Person":[0],"re-identification":[1],"faces":[2],"two":[3,135,197,200],"core":[4],"challenges:":[5],"precisely":[6],"locating":[7],"the":[8,20,71,90,130,134,156,172,179,205,210],"foreground":[9,67,91,117,166,192],"target":[10,21,105],"while":[11,102,161],"suppressing":[12],"background":[13,75,84,100,119,168,185],"noise":[14],"and":[15,33,44,118,142,151,167,187,199,207],"extracting":[16],"fine-grained":[17],"features":[18,153,169],"from":[19],"region.":[22],"Numerous":[23],"visual-only":[24],"approaches":[25],"address":[26],"these":[27],"issues":[28],"by":[29,53,78],"partitioning":[30],"an":[31,111,139],"image":[32],"applying":[34],"attention":[35,189],"modules,":[36],"yet":[37],"they":[38,63],"rely":[39],"on":[40,66,104,196],"costly":[41],"manual":[42],"annotations":[43],"struggle":[45],"with":[46,213],"complex":[47],"occlusions.":[48],"Recent":[49],"multimodal":[50],"methods,":[51],"motivated":[52],"CLIP,":[54],"introduce":[55],"semantic":[56],"cues":[57],"to":[58,98,170,181],"guide":[59],"visual":[60,150],"understanding.":[61],"However,":[62],"focus":[64],"solely":[65],"information,":[68],"but":[69],"overlook":[70],"potential":[72],"value":[73],"of":[74,209,220],"cues.":[76,193],"Inspired":[77],"human":[79],"perception,":[80],"we":[81,137,148],"argue":[82],"that":[83,114,154,215],"semantics":[85,92,158],"are":[86],"as":[87,89,95],"important":[88],"in":[93],"ReID,":[94],"humans":[96],"tend":[97],"eliminate":[99],"distractions":[101],"focusing":[103],"appearance.":[106],"Therefore,":[107],"this":[108],"paper":[109],"proposes":[110],"end-to-end":[112],"framework":[113],"jointly":[115],"models":[116],"information":[120],"within":[121],"a":[122],"dual-branch":[123],"cross-modal":[124],"feature":[125],"extraction":[126],"pipeline.":[127],"To":[128],"help":[129],"network":[131],"distinguish":[132],"between":[133,165],"domains,":[136,160],"propose":[138],"intra-semantic":[140],"alignment":[141],"inter-semantic":[143],"adversarial":[144],"learning":[145],"strategy.":[146],"Specifically,":[147],"align":[149],"textual":[152],"share":[155],"same":[157],"across":[159],"simultaneously":[162],"penalizing":[163],"similarity":[164],"enhance":[171,188],"network's":[173],"discriminative":[174],"power.":[175],"This":[176],"strategy":[177],"drives":[178],"model":[180],"actively":[182],"suppress":[183],"noisy":[184],"regions":[186],"toward":[190],"identity-relevant":[191],"Comprehensive":[194],"experiments":[195],"holistic":[198],"occluded":[201],"ReID":[202],"benchmarks":[203],"demonstrate":[204],"effectiveness":[206],"generality":[208],"proposed":[211],"method,":[212],"results":[214],"match":[216],"or":[217],"surpass":[218],"those":[219],"current":[221],"state-of-the-art":[222],"approaches.":[223]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
