{"id":"https://openalex.org/W7128697506","doi":"https://doi.org/10.1109/tits.2025.3649738","title":"CDFIT: A Transformer Using Cross-Modal Dual-Stream Feature Interaction for Multispectral Pedestrian Detection","display_name":"CDFIT: A Transformer Using Cross-Modal Dual-Stream Feature Interaction for Multispectral Pedestrian Detection","publication_year":2026,"publication_date":"2026-02-12","ids":{"openalex":"https://openalex.org/W7128697506","doi":"https://doi.org/10.1109/tits.2025.3649738"},"language":null,"primary_location":{"id":"doi:10.1109/tits.2025.3649738","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tits.2025.3649738","pdf_url":null,"source":{"id":"https://openalex.org/S144771191","display_name":"IEEE Transactions on Intelligent Transportation Systems","issn_l":"1524-9050","issn":["1524-9050","1558-0016"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Transportation Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102015377","display_name":"Zihao Huang","orcid":"https://orcid.org/0000-0002-4444-4514"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zihao Huang","raw_affiliation_strings":["Jiangsu Intelligent Urban Rail Engineering Research Center, Soochow University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Jiangsu Intelligent Urban Rail Engineering Research Center, Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021042952","display_name":"Wenshi Li","orcid":"https://orcid.org/0000-0002-6467-8449"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenshi Li","raw_affiliation_strings":["Jiangsu Intelligent Urban Rail Engineering Research Center, Soochow University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Jiangsu Intelligent Urban Rail Engineering Research Center, Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100721871","display_name":"Yuzhen Zhang","orcid":"https://orcid.org/0000-0002-7930-5549"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]},{"id":"https://openalex.org/I4210153519","display_name":"First Affiliated Hospital of Soochow University","ror":"https://ror.org/051jg5p78","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210153519"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuzhen Zhang","raw_affiliation_strings":["First Affiliated Hospital of Soochow University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"First Affiliated Hospital of Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682","https://openalex.org/I4210153519"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034938127","display_name":"Jiaren Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaren Guo","raw_affiliation_strings":["Jiangsu Intelligent Urban Rail Engineering Research Center, Soochow University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Jiangsu Intelligent Urban Rail Engineering Research Center, Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101379006","display_name":"Jianyin Zheng","orcid":null},"institutions":[{"id":"https://openalex.org/I4403386725","display_name":"Suzhou City University","ror":"https://ror.org/025jsyk19","country_code":null,"type":"education","lineage":["https://openalex.org/I4403386725"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianyin Zheng","raw_affiliation_strings":["Suzhou City University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Suzhou City University, Suzhou, China","institution_ids":["https://openalex.org/I4403386725"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019124641","display_name":"Guang Ji","orcid":"https://orcid.org/0000-0003-0842-3676"},"institutions":[{"id":"https://openalex.org/I4210101356","display_name":"Beijing Founder Electronics (China)","ror":"https://ror.org/00nwrzz95","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210101356"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guang Ji","raw_affiliation_strings":["Beijing Telesound Electronics Company Ltd., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Telesound Electronics Company Ltd., Beijing, China","institution_ids":["https://openalex.org/I4210101356"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003009707","display_name":"Yanyun Tao","orcid":"https://orcid.org/0000-0002-5342-1699"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanyun Tao","raw_affiliation_strings":["Jiangsu Intelligent Urban Rail Engineering Research Center, Soochow University, Suzhou, China"],"affiliations":[{"raw_affiliation_string":"Jiangsu Intelligent Urban Rail Engineering Research Center, Soochow University, Suzhou, China","institution_ids":["https://openalex.org/I3923682"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102015377"],"corresponding_institution_ids":["https://openalex.org/I3923682"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.50232432,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"27","issue":"3","first_page":"2899","last_page":"2912"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.906499981880188,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.906499981880188,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.03139999881386757,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.011099999770522118,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/multispectral-image","display_name":"Multispectral image","score":0.7103999853134155},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.5824000239372253},{"id":"https://openalex.org/keywords/pedestrian-detection","display_name":"Pedestrian detection","score":0.5102999806404114},{"id":"https://openalex.org/keywords/pedestrian","display_name":"Pedestrian","score":0.5080000162124634},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.49779999256134033},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4713999927043915},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.46779999136924744},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46549999713897705}],"concepts":[{"id":"https://openalex.org/C173163844","wikidata":"https://www.wikidata.org/wiki/Q1761440","display_name":"Multispectral image","level":2,"score":0.7103999853134155},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6887999773025513},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6656000018119812},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.5824000239372253},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5467000007629395},{"id":"https://openalex.org/C2780156472","wikidata":"https://www.wikidata.org/wiki/Q2355550","display_name":"Pedestrian detection","level":3,"score":0.5102999806404114},{"id":"https://openalex.org/C2777113093","wikidata":"https://www.wikidata.org/wiki/Q221488","display_name":"Pedestrian","level":2,"score":0.5080000162124634},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.49779999256134033},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4713999927043915},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.46779999136924744},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46549999713897705},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.45980000495910645},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.45100000500679016},{"id":"https://openalex.org/C29081049","wikidata":"https://www.wikidata.org/wiki/Q1364242","display_name":"Image stitching","level":2,"score":0.4251999855041504},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.40389999747276306},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3808000087738037},{"id":"https://openalex.org/C205606062","wikidata":"https://www.wikidata.org/wiki/Q5249645","display_name":"Decoupling (probability)","level":2,"score":0.32820001244544983},{"id":"https://openalex.org/C194995250","wikidata":"https://www.wikidata.org/wiki/Q531136","display_name":"Affordance","level":2,"score":0.32519999146461487},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.3163999915122986},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.2671000063419342},{"id":"https://openalex.org/C39927690","wikidata":"https://www.wikidata.org/wiki/Q11197","display_name":"Logarithm","level":2,"score":0.25699999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tits.2025.3649738","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tits.2025.3649738","pdf_url":null,"source":{"id":"https://openalex.org/S144771191","display_name":"IEEE Transactions on Intelligent Transportation Systems","issn_l":"1524-9050","issn":["1524-9050","1558-0016"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Intelligent Transportation Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Modality":[0],"imbalance":[1],"is":[2],"a":[3],"significant":[4],"challenge":[5],"for":[6],"multi-modal":[7],"interaction":[8,61],"at":[9],"various":[10],"depths":[11,134],"in":[12,28,64,96,114],"multispectral":[13,142],"pedestrian":[14],"detection":[15],"under":[16],"varying":[17],"illumination":[18],"environments.":[19],"To":[20],"overcome":[21],"the":[22,30,35,45,59,69,72,78,93,110,126,139],"limitations":[23],"of":[24,71,141],"current":[25],"cross":[26],"attention":[27,94,103],"addressing":[29],"modality":[31,76],"imbalance,":[32],"we":[33,81,116],"propose":[34,82],"Cross-Modal":[36],"Dual-Stream":[37],"Feature":[38],"Interaction":[39],"Transformer":[40,118],"(CDFIT).":[41],"CDFIT":[42,155],"capitalizes":[43],"on":[44,150],"Transformer\u2019s":[46],"ability":[47],"to":[48,66,77,89,122],"learn":[49],"long-range":[50],"dependencies,":[51],"extracting":[52],"global":[53],"intra-modal":[54,107],"and":[55,84,91,124,130,147],"inter-modal":[56,102],"correlations":[57],"during":[58],"feature":[60],"phase.":[62],"Crucially,":[63],"order":[65],"effectively":[67],"eliminate":[68],"interference":[70],"self-attention":[73],"within":[74],"one":[75],"alternative":[79],"one,":[80],"horizontal":[83],"vertical":[85],"correlation":[86],"decoupling":[87],"modes":[88],"divide":[90],"reassemble":[92],"maps":[95],"CDFIT.":[97],"This":[98],"facilitates":[99],"more":[100],"purified":[101],"while":[104],"preserving":[105],"relevant":[106],"self-attention,":[108],"reducing":[109],"information":[111,127],"interference.":[112],"Meanwhile,":[113],"CDFIT,":[115],"expand":[117],"into":[119],"dual-stream":[120],"pathways":[121],"align":[123],"assemble":[125],"from":[128],"RGB":[129],"thermal":[131],"modalities":[132],"across":[133],"separately,":[135],"thereby":[136],"greatly":[137],"enhancing":[138],"performance":[140,158],"object":[143],"detection.":[144],"Comprehensive":[145],"experiments":[146],"ablation":[148],"studies":[149],"benchmark":[151],"datasets":[152],"demonstrate":[153],"that":[154],"achieves":[156],"superior":[157],"compared":[159],"with":[160],"state-of-the-art":[161],"methods.":[162]},"counts_by_year":[],"updated_date":"2026-03-17T06:59:57.516163","created_date":"2026-02-13T00:00:00"}
