{"id":"https://openalex.org/W4415366572","doi":"https://doi.org/10.1109/tmm.2025.3623526","title":"Scale-Aware Attention and Multi-Modal Prompt Learning With Fusion Adapter for RGBT Tracking","display_name":"Scale-Aware Attention and Multi-Modal Prompt Learning With Fusion Adapter for RGBT Tracking","publication_year":2025,"publication_date":"2025-10-20","ids":{"openalex":"https://openalex.org/W4415366572","doi":"https://doi.org/10.1109/tmm.2025.3623526"},"language":null,"primary_location":{"id":"doi:10.1109/tmm.2025.3623526","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3623526","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114216756","display_name":"Xiang Liu","orcid":"https://orcid.org/0009-0007-2091-7624"},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiang Liu","raw_affiliation_strings":["School of Information, Yunnan University, Kunming, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Yunnan University, Kunming, China","institution_ids":["https://openalex.org/I189210763"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021895719","display_name":"Haiyan Li","orcid":"https://orcid.org/0000-0003-3193-1687"},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haiyan Li","raw_affiliation_strings":["School of Information, Yunnan University, Kunming, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Yunnan University, Kunming, China","institution_ids":["https://openalex.org/I189210763"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051706630","display_name":"Victor S. Sheng","orcid":"https://orcid.org/0000-0003-4960-174X"},"institutions":[{"id":"https://openalex.org/I12315562","display_name":"Texas Tech University","ror":"https://ror.org/0405mnx93","country_code":"US","type":"education","lineage":["https://openalex.org/I12315562"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Victor Sheng","raw_affiliation_strings":["Department of Computer Science, Texas Tech University, Lubbock, TX, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Texas Tech University, Lubbock, TX, USA","institution_ids":["https://openalex.org/I12315562"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065855286","display_name":"Yujun Ma","orcid":"https://orcid.org/0000-0003-2733-8813"},"institutions":[{"id":"https://openalex.org/I4210115515","display_name":"Nanyang Institute of Technology","ror":"https://ror.org/0203c2755","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210115515"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yujun Ma","raw_affiliation_strings":["School of Computer and Software, Nanyang Institute of Technology, Nanyang, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Software, Nanyang Institute of Technology, Nanyang, China","institution_ids":["https://openalex.org/I4210115515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004719842","display_name":"Xiaoguo Liang","orcid":null},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoguo Liang","raw_affiliation_strings":["School of Artificial Intelligence and Automation, Huazhong University of Science and Technology, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Automation, Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101514927","display_name":"Guanbo Wang","orcid":"https://orcid.org/0000-0001-8210-8805"},"institutions":[{"id":"https://openalex.org/I189210763","display_name":"Yunnan University","ror":"https://ror.org/0040axw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I189210763"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanbo Wang","raw_affiliation_strings":["School of Information, Yunnan University, Kunming, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Yunnan University, Kunming, China","institution_ids":["https://openalex.org/I189210763"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5114216756"],"corresponding_institution_ids":["https://openalex.org/I189210763"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15711034,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"28","issue":null,"first_page":"344","last_page":"359"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9763000011444092,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9763000011444092,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.9696999788284302,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9670000076293945,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5663999915122986},{"id":"https://openalex.org/keywords/bittorrent-tracker","display_name":"BitTorrent tracker","score":0.5485000014305115},{"id":"https://openalex.org/keywords/adapter","display_name":"Adapter (computing)","score":0.49230000376701355},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.47690001130104065},{"id":"https://openalex.org/keywords/eye-tracking","display_name":"Eye tracking","score":0.47510001063346863},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.446399986743927},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.36890000104904175},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.3499999940395355},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.34209999442100525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8299000263214111},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6468999981880188},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5663999915122986},{"id":"https://openalex.org/C57501372","wikidata":"https://www.wikidata.org/wiki/Q2021268","display_name":"BitTorrent tracker","level":3,"score":0.5485000014305115},{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.49230000376701355},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.49160000681877136},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.47690001130104065},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.47510001063346863},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.446399986743927},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.36890000104904175},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.3499999940395355},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.34209999442100525},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.3370000123977661},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.33649998903274536},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.30160000920295715},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.3012000024318695},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.29330000281333923},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.2847999930381775},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2833999991416931},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26460000872612},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.25200000405311584},{"id":"https://openalex.org/C207347870","wikidata":"https://www.wikidata.org/wiki/Q371174","display_name":"Gesture","level":2,"score":0.25189998745918274},{"id":"https://openalex.org/C2982962833","wikidata":"https://www.wikidata.org/wiki/Q17092450","display_name":"Information fusion","level":2,"score":0.25040000677108765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2025.3623526","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2025.3623526","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W2527415613","https://openalex.org/W2789137832","https://openalex.org/W2909946038","https://openalex.org/W2945948323","https://openalex.org/W2963188742","https://openalex.org/W2963905288","https://openalex.org/W2996575194","https://openalex.org/W2997131652","https://openalex.org/W2998756268","https://openalex.org/W3002567850","https://openalex.org/W3099671582","https://openalex.org/W3101990647","https://openalex.org/W3110562975","https://openalex.org/W3132864630","https://openalex.org/W3153607844","https://openalex.org/W3158472981","https://openalex.org/W3183904268","https://openalex.org/W3187284461","https://openalex.org/W3209394392","https://openalex.org/W3212995679","https://openalex.org/W3214116948","https://openalex.org/W4283808043","https://openalex.org/W4287556358","https://openalex.org/W4289752563","https://openalex.org/W4304098539","https://openalex.org/W4312651322","https://openalex.org/W4312668764","https://openalex.org/W4312751983","https://openalex.org/W4320008877","https://openalex.org/W4321195219","https://openalex.org/W4362654293","https://openalex.org/W4380884766","https://openalex.org/W4382999123","https://openalex.org/W4386075603","https://openalex.org/W4386075647","https://openalex.org/W4386083135","https://openalex.org/W4386275716","https://openalex.org/W4386453815","https://openalex.org/W4387272128","https://openalex.org/W4391824769","https://openalex.org/W4392449443","https://openalex.org/W4392931239","https://openalex.org/W4393148493","https://openalex.org/W4393154934","https://openalex.org/W4393159185","https://openalex.org/W4393171245","https://openalex.org/W4396505795","https://openalex.org/W4396714092","https://openalex.org/W4401163825","https://openalex.org/W4401210525","https://openalex.org/W4402568752","https://openalex.org/W4402702990","https://openalex.org/W4402754150","https://openalex.org/W4409346692"],"related_works":[],"abstract_inverted_index":{"Fusing":[0],"visible":[1],"(RGB)":[2],"and":[3,41,75,131,142,188,199],"thermal":[4],"(T)":[5],"images":[6],"for":[7],"RGBT":[8,55,64,179],"tracking":[9,39,65,181],"has":[10],"received":[11],"growing":[12],"interest":[13],"in":[14,51,98,164],"the":[15,24,27,43,52,93,96,99,113,156,159,165,172,192],"field":[16,53],"of":[17,26,54,95,101,158,167],"computer":[18],"vision.":[19],"However,":[20],"how":[21],"to":[22,29,37,91,134,154,161],"improve":[23],"robustness":[25],"tracker":[28,97],"target":[30,102],"scale":[31,103],"variety,":[32],"effectively":[33],"apply":[34],"visual":[35,137],"prompts":[36,138],"multimodal":[38,44,71,117,168],"tasks,":[40],"enhance":[42,92],"fusion":[45,77,170],"effectiveness":[46],"are":[47],"still":[48],"urgent":[49],"challenges":[50],"tracking.":[56],"To":[57],"this":[58,60],"purpose,":[59],"work":[61],"proposes":[62],"an":[63],"framework":[66],"integrating":[67],"scale-aware":[68,83],"dilation":[69,84,110],"attention,":[70],"prompt":[72,118,145],"interaction":[73,119],"learning,":[74],"cross-":[76],"adapter,":[78],"named":[79],"MPANet.":[80],"Firstly,":[81],"a":[82,116,148],"attention":[85,130,133],"(SADA)":[86],"module":[87,122],"is":[88,123,152],"put":[89],"forward":[90],"flexibility":[94],"presence":[100],"variations":[104],"by":[105],"embedding":[106],"convolutions":[107],"with":[108],"different":[109,140,162],"rates":[111],"into":[112],"self-attention.":[114],"Subsequently,":[115],"learning":[120],"(MPIL)":[121],"constructed,":[124],"which":[125],"combines":[126],"global":[127],"token":[128],"adaptive":[129],"spatial":[132],"efficiently":[135],"learn":[136],"from":[139],"modalities":[141,163],"achieve":[143],"intermodal":[144],"interactions.":[146],"Finally,":[147],"cross-fusion":[149],"adapter":[150,173],"(CFA)":[151],"developed":[153],"facilitate":[155],"adaptability":[157],"network":[160],"process":[166],"information":[169],"through":[171],"mechanism.":[174],"Extensive":[175],"experiments":[176],"on":[177],"public":[178],"benchmark":[180],"datasets":[182],"such":[183],"as":[184],"GTOT,":[185],"RGBT234,":[186],"LasHeR":[187],"VTUAV":[189],"demonstrate":[190],"that":[191],"proposed":[193],"method":[194],"outperforms":[195],"existing":[196],"advanced":[197],"trackers":[198],"achieves":[200],"state-of-the-art":[201],"performance.":[202]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-21T00:00:00"}
