{"id":"https://openalex.org/W4416249452","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228230","title":"CMAN: Compact Modality Alignment Network With Dual Stream Transformer For Visible-Infrared Person Re-identification","display_name":"CMAN: Compact Modality Alignment Network With Dual Stream Transformer For Visible-Infrared Person Re-identification","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416249452","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228230"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228230","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228230","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091892920","display_name":"Xuyang Song","orcid":null},"institutions":[{"id":"https://openalex.org/I4210094876","display_name":"Ministry of Education","ror":"https://ror.org/00q919b81","country_code":"SA","type":"government","lineage":["https://openalex.org/I4210094876"]}],"countries":["SA"],"is_corresponding":true,"raw_author_name":"Xuyang Song","raw_affiliation_strings":["Xiamen University,Key Laboratory of Multimedia Trusted Perception and Efficient Computing Ministry of Education of China,P.R. China,361005"],"affiliations":[{"raw_affiliation_string":"Xiamen University,Key Laboratory of Multimedia Trusted Perception and Efficient Computing Ministry of Education of China,P.R. China,361005","institution_ids":["https://openalex.org/I4210094876"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032612432","display_name":"Pingyang Dai","orcid":"https://orcid.org/0000-0001-9780-271X"},"institutions":[{"id":"https://openalex.org/I4210094876","display_name":"Ministry of Education","ror":"https://ror.org/00q919b81","country_code":"SA","type":"government","lineage":["https://openalex.org/I4210094876"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Pingyang Dai","raw_affiliation_strings":["Xiamen University,Key Laboratory of Multimedia Trusted Perception and Efficient Computing Ministry of Education of China,P.R. China,361005"],"affiliations":[{"raw_affiliation_string":"Xiamen University,Key Laboratory of Multimedia Trusted Perception and Efficient Computing Ministry of Education of China,P.R. China,361005","institution_ids":["https://openalex.org/I4210094876"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5091892920"],"corresponding_institution_ids":["https://openalex.org/I4210094876"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37183667,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.858299970626831,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.858299970626831,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.06830000132322311,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.014700000174343586,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6338000297546387},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5766000151634216},{"id":"https://openalex.org/keywords/subspace-topology","display_name":"Subspace topology","score":0.5205000042915344},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5123999714851379},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.40860000252723694},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.40290001034736633},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.3806999921798706}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.761900007724762},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6338000297546387},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.579800009727478},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5766000151634216},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.5205000042915344},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5123999714851379},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4876999855041504},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.40860000252723694},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.40290001034736633},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.3806999921798706},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.3517000079154968},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3361000120639801},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.30000001192092896},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.29120001196861267},{"id":"https://openalex.org/C193415008","wikidata":"https://www.wikidata.org/wiki/Q639681","display_name":"Network architecture","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C12362212","wikidata":"https://www.wikidata.org/wiki/Q728435","display_name":"Linear subspace","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.26269999146461487}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228230","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228230","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W2596603442","https://openalex.org/W2777534232","https://openalex.org/W2808260522","https://openalex.org/W2904949947","https://openalex.org/W2954773727","https://openalex.org/W2962858109","https://openalex.org/W2963805953","https://openalex.org/W2967515867","https://openalex.org/W2970390221","https://openalex.org/W2971016330","https://openalex.org/W2985033611","https://openalex.org/W2996878574","https://openalex.org/W2997877744","https://openalex.org/W2998508940","https://openalex.org/W2998792609","https://openalex.org/W3004990178","https://openalex.org/W3033235266","https://openalex.org/W3034494316","https://openalex.org/W3034519219","https://openalex.org/W3034727830","https://openalex.org/W3035673257","https://openalex.org/W3107848599","https://openalex.org/W3139438386","https://openalex.org/W3176633985","https://openalex.org/W3194065175","https://openalex.org/W3202750592","https://openalex.org/W3202788649","https://openalex.org/W3204075450","https://openalex.org/W3207905476","https://openalex.org/W3210946531","https://openalex.org/W4214736485","https://openalex.org/W4312802153","https://openalex.org/W4382240079","https://openalex.org/W4386065398","https://openalex.org/W4390872891","https://openalex.org/W4393159343","https://openalex.org/W4402952238","https://openalex.org/W4410583163","https://openalex.org/W4415795272"],"related_works":[],"abstract_inverted_index":{"Visible-infrared":[0],"person":[1],"re-identification":[2],"(VI-ReID)":[3],"aims":[4],"to":[5,56,86,122,148,162,210,237],"match":[6],"pedestrian":[7],"images":[8,44],"captured":[9],"by":[10,41,171],"visible":[11,27],"and":[12,23,30,49,61,107,117,126,153,186,212,252],"infrared":[13,31],"cameras.":[14],"The":[15],"main":[16],"challenge":[17],"lies":[18],"in":[19,136,145,159,195,199,216,233],"the":[20,81,109,112,137,174,190,200,205,230,234,239,248,256,259],"severe":[21],"cross-modality":[22,39,59],"intra-modality":[24],"differences":[25],"between":[26],"light":[28],"(VIS)":[29],"(IR)":[32],"images.":[33],"Most":[34],"current":[35,235],"CNN-based":[36],"methods":[37],"achieve":[38],"retrieval":[40],"mapping":[42],"two":[43],"into":[45,115],"a":[46,73,88,99,155],"high-dimensional":[47],"subspace":[48],"exploiting":[50],"modality-shared":[51],"features,":[52],"making":[53],"it":[54,227],"difficult":[55],"mine":[57],"diverse":[58,185],"representations":[60],"effectively":[62],"capture":[63],"global":[64],"image":[65,131],"dependencies.":[66],"To":[67],"address":[68],"these":[69,160],"issues,":[70],"we":[71,96,140,203],"propose":[72],"compact":[74],"modality":[75,90,147,236],"alignment":[76,91],"network":[77,102],"(CMAN)":[78],"based":[79,103],"on":[80,104,247],"dual":[82,100],"stream":[83],"ViT":[84],"architecture":[85],"explore":[87],"novel":[89],"approach":[92],"for":[93],"VI-ReID.":[94],"Specifically,":[95],"first":[97],"deploy":[98],"deep":[101],"vision":[105],"transformer":[106],"divide":[108],"layers":[110],"of":[111,129,176,192,225,242,258],"self-attention":[113],"mechanism":[114],"heterogeneous":[116,138],"isomorphic":[118,201],"modules,":[119],"allowing":[120],"us":[121],"extract":[123],"modality-specific":[124,193],"features":[125,128,194,224],"shared":[127,223],"each":[130,146,164,169,177,182,196],"at":[132],"different":[133,217],"stages.":[134],"Then,":[135],"module,":[139],"use":[141,204],"multiple":[142,150],"class":[143,165,178,214,231],"tokens":[144,215],"represent":[149],"embedding":[151,183],"spaces":[152,161],"apply":[154],"Dynamic":[156],"Controller":[157],"(DC)":[158],"push":[163],"token":[166,232],"away":[167],"from":[168],"other":[170],"adaptively":[172],"adjusting":[173],"weight":[175],"token,":[179],"which":[180],"makes":[181],"space":[184],"compact,":[187],"thereby":[188],"improving":[189],"discrimination":[191],"modality.":[197,244],"Finally,":[198],"part,":[202],"Token":[206],"Permutation":[207],"(TP)":[208],"module":[209],"permute":[211],"concatenate":[213],"modalities.":[218],"Not":[219],"only":[220],"helps":[221],"align":[222],"modalities,":[226],"also":[228],"allows":[229],"perceive":[238],"local":[240],"details":[241],"another":[243],"Extensive":[245],"experiments":[246],"public":[249],"SYSU-MM01,":[250],"RegDB,":[251],"LLCM":[253],"datasets":[254],"demonstrate":[255],"superiority":[257],"proposed":[260],"CMAN":[261],"over":[262],"state-of-the-art":[263],"methods.":[264]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
