{"id":"https://openalex.org/W4411446408","doi":"https://doi.org/10.1109/tnnls.2025.3577292","title":"SFAN: Selective Filter and Alignment Network for Cross-Modal Retrieval","display_name":"SFAN: Selective Filter and Alignment Network for Cross-Modal Retrieval","publication_year":2025,"publication_date":"2025-06-19","ids":{"openalex":"https://openalex.org/W4411446408","doi":"https://doi.org/10.1109/tnnls.2025.3577292","pmid":"https://pubmed.ncbi.nlm.nih.gov/40536846"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2025.3577292","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3577292","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102665049","display_name":"Yongle Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I25355098","display_name":"Chang'an University","ror":"https://ror.org/05mxya461","country_code":"CN","type":"education","lineage":["https://openalex.org/I25355098"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yongle Huang","raw_affiliation_strings":["School of Information Engineering, Chang&#x2019;an University, Xi&#x2019;an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Chang&#x2019;an University, Xi&#x2019;an, Shaanxi, China","institution_ids":["https://openalex.org/I25355098"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110814983","display_name":"Zedong Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I25355098","display_name":"Chang'an University","ror":"https://ror.org/05mxya461","country_code":"CN","type":"education","lineage":["https://openalex.org/I25355098"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zedong Liu","raw_affiliation_strings":["School of Information Engineering, Chang&#x2019;an University, Xi&#x2019;an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Chang&#x2019;an University, Xi&#x2019;an, Shaanxi, China","institution_ids":["https://openalex.org/I25355098"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103070400","display_name":"Shijie Sun","orcid":"https://orcid.org/0000-0003-4043-8448"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shijie Sun","raw_affiliation_strings":["School of Data Science and Artificial Intelligence, Chang&#x2019;an University, Xi&#x2019;an, Shaanxi, WA, China"],"affiliations":[{"raw_affiliation_string":"School of Data Science and Artificial Intelligence, Chang&#x2019;an University, Xi&#x2019;an, Shaanxi, WA, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071688309","display_name":"Ningning Cui","orcid":"https://orcid.org/0000-0002-1940-625X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ningning Cui","raw_affiliation_strings":["School of Data Science and Artificial Intelligence, Chang&#x2019;an University, Xi&#x2019;an, Shaanxi, WA, China"],"affiliations":[{"raw_affiliation_string":"School of Data Science and Artificial Intelligence, Chang&#x2019;an University, Xi&#x2019;an, Shaanxi, WA, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100380474","display_name":"Jianxin Li","orcid":"https://orcid.org/0000-0002-9059-330X"},"institutions":[{"id":"https://openalex.org/I12079687","display_name":"Edith Cowan University","ror":"https://ror.org/05jhnwe22","country_code":"AU","type":"education","lineage":["https://openalex.org/I12079687"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jianxin Li","raw_affiliation_strings":["School of Business and Law, Edith Cowan University, Joondalup, WA, Australia"],"affiliations":[{"raw_affiliation_string":"School of Business and Law, Edith Cowan University, Joondalup, WA, Australia","institution_ids":["https://openalex.org/I12079687"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5102665049"],"corresponding_institution_ids":["https://openalex.org/I25355098"],"apc_list":null,"apc_paid":null,"fwci":1.2784,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.80934789,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"36","issue":"10","first_page":"18792","last_page":"18804"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9646999835968018,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7815617322921753},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.6290994882583618},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6214231252670288},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5948229432106018},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5774002075195312},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5611103177070618},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.45085376501083374},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4165477156639099},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.4164751470088959},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.27903372049331665},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.27865728735923767},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.1115146279335022}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7815617322921753},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.6290994882583618},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6214231252670288},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5948229432106018},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5774002075195312},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5611103177070618},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.45085376501083374},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4165477156639099},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.4164751470088959},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27903372049331665},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.27865728735923767},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.1115146279335022},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2025.3577292","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3577292","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:40536846","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40536846","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G193029910","display_name":null,"funder_award_id":"2023YFB4301800","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G258990425","display_name":null,"funder_award_id":"300102404101","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W1538281814","https://openalex.org/W1861492603","https://openalex.org/W1933349210","https://openalex.org/W2185175083","https://openalex.org/W2277195237","https://openalex.org/W2302086703","https://openalex.org/W2425121537","https://openalex.org/W2510661833","https://openalex.org/W2745461083","https://openalex.org/W2765440071","https://openalex.org/W2896457183","https://openalex.org/W2909384861","https://openalex.org/W2950162424","https://openalex.org/W2962845008","https://openalex.org/W2962964995","https://openalex.org/W2964303913","https://openalex.org/W2965373594","https://openalex.org/W2967957126","https://openalex.org/W2988823324","https://openalex.org/W2994818707","https://openalex.org/W3005971801","https://openalex.org/W3035212740","https://openalex.org/W3035552787","https://openalex.org/W3035588244","https://openalex.org/W3035605030","https://openalex.org/W3093088671","https://openalex.org/W3118694826","https://openalex.org/W3153232703","https://openalex.org/W4214819138","https://openalex.org/W4285145317","https://openalex.org/W4307233751","https://openalex.org/W4312651322","https://openalex.org/W4313178921","https://openalex.org/W4323338501","https://openalex.org/W4360897523","https://openalex.org/W4378805087","https://openalex.org/W4385895960","https://openalex.org/W4385934203","https://openalex.org/W4386071757","https://openalex.org/W4386075572","https://openalex.org/W4386076600","https://openalex.org/W4387968136","https://openalex.org/W4390603585","https://openalex.org/W4391344842","https://openalex.org/W4392909853","https://openalex.org/W4392977092","https://openalex.org/W4400679250","https://openalex.org/W4402727787","https://openalex.org/W4402754025","https://openalex.org/W4403842425"],"related_works":["https://openalex.org/W2385859805","https://openalex.org/W3125011624","https://openalex.org/W1508631387","https://openalex.org/W2370917603","https://openalex.org/W2530972254","https://openalex.org/W2952760143","https://openalex.org/W2017776670","https://openalex.org/W2347897961","https://openalex.org/W2340870721","https://openalex.org/W627697492"],"abstract_inverted_index":{"Bridging":[0],"the":[1,85,118,133,155,164,182],"gap":[2],"between":[3],"visual":[4,30],"and":[5,42,52,88,106,131,151,167],"textual":[6,36],"modalities":[7],"effectively":[8,41,176],"has":[9],"consistently":[10],"been":[11],"a":[12,58,81,141,189],"key":[13,63,129],"challenge":[14],"in":[15,29,35,55],"cross-modal":[16,64,74,185],"retrieval.":[17,75],"Fine-grained":[18],"matching":[19],"approaches":[20],"improve":[21],"performance":[22],"by":[23,188],"precisely":[24],"aligning":[25],"salient":[26],"region":[27],"features":[28,47],"modality":[31],"with":[32],"word":[33],"embeddings":[34,147,157],"modality.":[37,114],"However,":[38],"how":[39],"to":[40,92,104,126,144,153],"efficiently":[43],"filter":[44,87,101,108],"out":[45,109],"irrelevant":[46,49,136],"(e.g.,":[48],"background":[50],"regions":[51],"nonmeaningful":[53],"prepositions)":[54],"multimodality":[56],"remains":[57],"significant":[59],"challenge.":[60],"Furthermore,":[61],"capturing":[62],"relationships":[65],"while":[66],"minimizing":[67],"misalignment":[68],"interference":[69],"is":[70],"crucial":[71],"for":[72,158],"effective":[73],"In":[76],"this":[77],"work,":[78],"we":[79,97,139],"propose":[80,98,117],"novel":[82],"approach":[83],"called":[84],"selective":[86,100,122],"alignment":[89,123],"network":[90],"(SFAN)":[91],"tackle":[93],"these":[94,146],"challenges.":[95],"First,":[96],"modality-specific":[99],"modules":[102],"(SFMs)":[103],"selectively":[105,127],"implicitly":[107],"redundant":[110],"information":[111],"within":[112],"each":[113],"We":[115],"then":[116],"state-space":[119],"models":[120],"(SSMs)-based":[121],"module":[124],"(SAM)":[125],"capture":[128],"correspondences":[130],"reduce":[132],"disturbance":[134],"of":[135],"associations.":[137],"Finally,":[138],"utilize":[140],"fusion":[142],"operation":[143],"combine":[145],"from":[148],"both":[149],"SFM":[150],"SAM":[152],"derive":[154],"final":[156],"similarity":[159],"computation.":[160],"Extensive":[161],"experiments":[162],"on":[163],"Flickr30k,":[165],"MS-COCO,":[166],"MSR-VTT":[168],"datasets":[169],"reveal":[170],"that":[171],"our":[172],"proposed":[173],"SFAN":[174],"can":[175],"learn":[177],"robust":[178],"patterns,":[179],"significantly":[180],"outperforming":[181],"state-of-the-art":[183],"(SOTA)":[184],"retrieval":[186],"methods":[187],"wide":[190],"margin.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
