{"id":"https://openalex.org/W4403096679","doi":"https://doi.org/10.1145/3698399","title":"Motion-Aware Self-Supervised RGBT Tracking with Multi-Modality Hierarchical Transformers","display_name":"Motion-Aware Self-Supervised RGBT Tracking with Multi-Modality Hierarchical Transformers","publication_year":2024,"publication_date":"2024-10-03","ids":{"openalex":"https://openalex.org/W4403096679","doi":"https://doi.org/10.1145/3698399"},"language":"en","primary_location":{"id":"doi:10.1145/3698399","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3698399","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068715611","display_name":"Shenglan Li","orcid":"https://orcid.org/0000-0001-9245-5915"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shenglan Li","raw_affiliation_strings":["School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023822600","display_name":"Rui Yao","orcid":"https://orcid.org/0000-0003-2734-915X"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rui Yao","raw_affiliation_strings":["School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025079159","display_name":"Yong Zhou","orcid":"https://orcid.org/0000-0001-6207-0299"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Zhou","raw_affiliation_strings":["School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033904998","display_name":"Hancheng Zhu","orcid":"https://orcid.org/0000-0002-5418-9879"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hancheng Zhu","raw_affiliation_strings":["School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090052616","display_name":"Jiaqi Zhao","orcid":"https://orcid.org/0000-0002-3564-5090"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaqi Zhao","raw_affiliation_strings":["School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081933126","display_name":"Zhiwen Shao","orcid":"https://orcid.org/0000-0002-9383-8384"},"institutions":[{"id":"https://openalex.org/I25757504","display_name":"China University of Mining and Technology","ror":"https://ror.org/01xt2dr21","country_code":"CN","type":"education","lineage":["https://openalex.org/I25757504"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiwen Shao","raw_affiliation_strings":["School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, Xuzhou, China and Mine Digitization Engineering Research Center of the Ministry of Education, Xuzhou, China","institution_ids":["https://openalex.org/I25757504"]},{"raw_affiliation_string":"School of Computer Sciences and Technology, China University of Mining and Technology, and Mine Digitization Engineering Research Center of the Ministry of Education, China","institution_ids":["https://openalex.org/I25757504"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109797436","display_name":"Abdulmotaleb El Saddik","orcid":null},"institutions":[{"id":"https://openalex.org/I153718931","display_name":"University of Ottawa","ror":"https://ror.org/03c4mmv16","country_code":"CA","type":"education","lineage":["https://openalex.org/I153718931"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Abdulmotaleb El Saddik","raw_affiliation_strings":["School of Electrical Engineering and Computer Science, University of Ottawa, Ottawa, ON, Canada","School of Electrical Engineering and Computer Science, University of Ottawa, Canada"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Computer Science, University of Ottawa, Ottawa, ON, Canada","institution_ids":["https://openalex.org/I153718931"]},{"raw_affiliation_string":"School of Electrical Engineering and Computer Science, University of Ottawa, Canada","institution_ids":["https://openalex.org/I153718931"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5068715611"],"corresponding_institution_ids":["https://openalex.org/I25757504"],"apc_list":null,"apc_paid":null,"fwci":1.6935,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.85786467,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"20","issue":"12","first_page":"1","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8874062895774841},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5690391659736633},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5191332697868347},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.47525808215141296},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4373118579387665},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3686627149581909}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8874062895774841},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5690391659736633},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5191332697868347},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.47525808215141296},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4373118579387665},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3686627149581909},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3698399","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3698399","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5157351965","display_name":null,"funder_award_id":"62172417, 62272461, 62101555, 62106268, and 62472424","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W182940129","https://openalex.org/W1857884451","https://openalex.org/W2016802777","https://openalex.org/W2060451775","https://openalex.org/W2071984725","https://openalex.org/W2117539524","https://openalex.org/W2119539043","https://openalex.org/W2132103241","https://openalex.org/W2138478117","https://openalex.org/W2154889144","https://openalex.org/W2194775991","https://openalex.org/W2294296888","https://openalex.org/W2527415613","https://openalex.org/W2557641257","https://openalex.org/W2752782242","https://openalex.org/W2765667535","https://openalex.org/W2799058067","https://openalex.org/W2897436422","https://openalex.org/W2902767594","https://openalex.org/W2927438889","https://openalex.org/W2963188742","https://openalex.org/W2963905288","https://openalex.org/W2964423614","https://openalex.org/W2966759264","https://openalex.org/W2997131652","https://openalex.org/W2998756268","https://openalex.org/W3002567850","https://openalex.org/W3034915791","https://openalex.org/W3035694605","https://openalex.org/W3099681648","https://openalex.org/W3101990647","https://openalex.org/W3109466111","https://openalex.org/W3110562975","https://openalex.org/W3126855404","https://openalex.org/W3127317646","https://openalex.org/W3132864630","https://openalex.org/W3153607844","https://openalex.org/W3155938136","https://openalex.org/W3158472981","https://openalex.org/W3183152796","https://openalex.org/W3214586131","https://openalex.org/W4205537101","https://openalex.org/W4205757850","https://openalex.org/W4225574120","https://openalex.org/W4226126595","https://openalex.org/W4283808043","https://openalex.org/W4285603016","https://openalex.org/W4286974493","https://openalex.org/W4312668764","https://openalex.org/W4312751983","https://openalex.org/W4312771325","https://openalex.org/W4313555022","https://openalex.org/W4319001020","https://openalex.org/W4386066081","https://openalex.org/W4386066282","https://openalex.org/W4386075603","https://openalex.org/W4386076504","https://openalex.org/W4386083135","https://openalex.org/W4386954241","https://openalex.org/W4389627669","https://openalex.org/W4393154017","https://openalex.org/W4393154934","https://openalex.org/W4396505795","https://openalex.org/W4399435597"],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398","https://openalex.org/W2775347418"],"abstract_inverted_index":{"Supervised":[0],"RGBT":[1,16,50],"(SRGBT)":[2],"tracking":[3,18,26,43,52,60,231],"tasks":[4],"need":[5],"both":[6,95],"expensive":[7],"and":[8,65,97,134,161,217],"time-consuming":[9],"annotations.":[10],"Therefore,":[11],"the":[12,57,99,103,113,117,125,150,155,163,167,171,186,198,224,235],"implementation":[13],"of":[14,102,116,127,170,179,200,228,237],"Self-Supervised":[15],"(SSRGBT)":[17],"methods":[19,27,61],"has":[20],"become":[21],"increasingly":[22],"important.":[23],"Straightforward":[24],"SSRGBT":[25],"use":[28],"pseudo-labels":[29,34,64],"for":[30,76,87],"tracking,":[31,220],"but":[32],"inaccurate":[33,135,201],"can":[35],"lead":[36],"to":[37,55,70,94,110,123,190,195,213],"object":[38,51,219],"drift,":[39],"which":[40],"severely":[41],"affects":[42],"performance.":[44],"This":[45,90],"article":[46],"proposes":[47],"a":[48,81,143],"self-supervised":[49],"method":[53],"(S2OTFormer)":[54],"bridge":[56],"gap":[58],"between":[59],"supervised":[62],"under":[63],"ground":[66],"truth":[67],"labels.":[68],"Firstly,":[69],"provide":[71],"more":[72,215],"robust":[73],"appearance":[74,136],"features":[75,160],"motion":[77,128,133,152,168,177],"cues,":[78],"we":[79,141,203],"introduce":[80,142],"multi-modality":[82],"hierarchical":[83],"transformer":[84],"(MHT)":[85],"module":[86,91,105,222],"feature":[88],"fusion.":[89],"allocates":[92],"weights":[93],"modalities":[96],"strengthens":[98],"expressive":[100],"capability":[101],"MHT":[104],"through":[106,244],"multiple":[107],"nonlinear":[108],"layers":[109],"fully":[111],"utilize":[112],"complementary":[114],"information":[115,137],"two":[118],"modalities.":[119],"Secondly,":[120],"in":[121],"order":[122],"solve":[124],"problems":[126],"blur":[129],"caused":[130,138],"by":[131,139,184],"camera":[132],"pseudo-labels,":[140,202],"motion-aware":[144],"mechanism":[145],"(MAM).":[146],"The":[147,176,253],"MAM":[148],"extracts":[149],"average":[151],"vectors":[153,169,178],"from":[154],"previous":[156],"multi-frame":[157],"search":[158,173],"frame":[159,174],"constructs":[162],"consistency":[164],"loss":[165],"with":[166],"current":[172],"features.":[175],"inter-frame":[180,187],"objects":[181],"are":[182],"obtained":[183],"reusing":[185],"attention":[188],"map":[189],"predict":[191],"coordinate":[192],"positions.":[193],"Finally,":[194],"further":[196],"reduce":[197],"effect":[199],"propose":[204],"an":[205],"Attention-Based":[206],"Multi-Scale":[207],"Enhancement":[208],"Module.":[209],"By":[210],"introducing":[211],"cross-attention":[212],"achieve":[214],"precise":[216],"accurate":[218],"this":[221],"overcomes":[223],"receptive":[225],"field":[226],"limitations":[227],"traditional":[229],"CNN":[230],"heads.":[232],"We":[233],"demonstrate":[234],"effectiveness":[236],"S2OTFormer":[238],"on":[239],"four":[240],"large-scale":[241],"public":[242],"datasets":[243],"extensive":[245],"comparisons":[246],"as":[247,249],"well":[248],"numerous":[250],"ablation":[251],"experiments.":[252],"source":[254],"code":[255],"is":[256],"available":[257],"at":[258],"https://github.com/LiShenglana/S2OTFormer":[259],".":[260]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":3}],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-10-10T00:00:00"}
