{"id":"https://openalex.org/W4417248874","doi":"https://doi.org/10.1109/tpami.2025.3642821","title":"Mettle: Meta-Token Learning for Memory-Efficient Audio-Visual Adaptation","display_name":"Mettle: Meta-Token Learning for Memory-Efficient Audio-Visual Adaptation","publication_year":2025,"publication_date":"2025-12-11","ids":{"openalex":"https://openalex.org/W4417248874","doi":"https://doi.org/10.1109/tpami.2025.3642821","pmid":"https://pubmed.ncbi.nlm.nih.gov/41379918"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2025.3642821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3642821","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047303316","display_name":"Jinxing Zhou","orcid":"https://orcid.org/0000-0001-6402-7593"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]},{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]},{"id":"https://openalex.org/I4210137491","display_name":"National Science Centre","ror":"https://ror.org/03ha2q922","country_code":"PL","type":"government","lineage":["https://openalex.org/I4210137491"]}],"countries":["AE","CN","PL"],"is_corresponding":false,"raw_author_name":"Jinxing Zhou","raw_affiliation_strings":["Hefei Comprehensive National Science Center, Hefei University of Technology (HFUT), Hefei, China","Department of Computer Vision, Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates"],"raw_orcid":"https://orcid.org/0000-0001-6402-7593","affiliations":[{"raw_affiliation_string":"Hefei Comprehensive National Science Center, Hefei University of Technology (HFUT), Hefei, China","institution_ids":["https://openalex.org/I16365422","https://openalex.org/I4210137491"]},{"raw_affiliation_string":"Department of Computer Vision, Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100461813","display_name":"Zhihui Li","orcid":"https://orcid.org/0000-0001-9642-8009"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhihui Li","raw_affiliation_strings":["Department of Electronic Engineering and Information Science, School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0001-9642-8009","affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering and Information Science, School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041593911","display_name":"Yongqiang Yu","orcid":"https://orcid.org/0000-0001-9778-7783"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Yongqiang Yu","raw_affiliation_strings":["Department of Computer Vision, Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Vision, Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068136447","display_name":"Yanghao Zhou","orcid":"https://orcid.org/0009-0001-9401-4432"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yanghao Zhou","raw_affiliation_strings":["Department of Electrical and Computer Engineering, National University of Singapore, Singapore"],"raw_orcid":"https://orcid.org/0009-0001-9401-4432","affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088611866","display_name":"Ruohao Guo","orcid":"https://orcid.org/0000-0002-1091-272X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruohao Guo","raw_affiliation_strings":["School of Intelligence Science and Technology, Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-1091-272X","affiliations":[{"raw_affiliation_string":"School of Intelligence Science and Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100338853","display_name":"Guangyao Li","orcid":"https://orcid.org/0000-0002-2179-8555"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangyao Li","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102015118","display_name":"Yuxin Mao","orcid":"https://orcid.org/0000-0002-2837-2290"},"institutions":[{"id":"https://openalex.org/I4210111163","display_name":"Shanghai Open University","ror":"https://ror.org/027r7gj11","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210111163"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxin Mao","raw_affiliation_strings":["OpenNLPLab, Shanghai, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"OpenNLPLab, Shanghai, China","institution_ids":["https://openalex.org/I4210111163"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103127046","display_name":"Mingfei Han","orcid":"https://orcid.org/0000-0003-0040-6177"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Mingfei Han","raw_affiliation_strings":["Department of Computer Vision, Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates"],"raw_orcid":"https://orcid.org/0000-0003-0040-6177","affiliations":[{"raw_affiliation_string":"Department of Computer Vision, Mohamed bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, United Arab Emirates","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034967388","display_name":"Xiaojun Chang","orcid":"https://orcid.org/0000-0002-7778-8807"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojun Chang","raw_affiliation_strings":["Department of Electronic Engineering and Information Science, School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering and Information Science, School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100377147","display_name":"Meng Wang","orcid":"https://orcid.org/0000-0002-3094-7735"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]},{"id":"https://openalex.org/I4210137491","display_name":"National Science Centre","ror":"https://ror.org/03ha2q922","country_code":"PL","type":"government","lineage":["https://openalex.org/I4210137491"]}],"countries":["CN","PL"],"is_corresponding":false,"raw_author_name":"Meng Wang","raw_affiliation_strings":["Hefei Comprehensive National Science Center, Hefei University of Technology (HFUT), Hefei, China","Hefei University of Technology (HFUT) and Hefei Comprehensive National Science Center, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0002-3094-7735","affiliations":[{"raw_affiliation_string":"Hefei Comprehensive National Science Center, Hefei University of Technology (HFUT), Hefei, China","institution_ids":["https://openalex.org/I16365422","https://openalex.org/I4210137491"]},{"raw_affiliation_string":"Hefei University of Technology (HFUT) and Hefei Comprehensive National Science Center, Hefei, China","institution_ids":["https://openalex.org/I16365422","https://openalex.org/I4210137491"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.064,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82978831,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"48","issue":"4","first_page":"4222","last_page":"4238"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.38989999890327454,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.38989999890327454,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.2282000035047531,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.061799999326467514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7046999931335449},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.45170000195503235},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.4223000109195709},{"id":"https://openalex.org/keywords/backpropagation","display_name":"Backpropagation","score":0.4196999967098236},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4133000075817108},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.3937000036239624},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.3862999975681305},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3587999939918518}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7944999933242798},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7046999931335449},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6101999878883362},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.45170000195503235},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.4223000109195709},{"id":"https://openalex.org/C155032097","wikidata":"https://www.wikidata.org/wiki/Q798503","display_name":"Backpropagation","level":3,"score":0.4196999967098236},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4133000075817108},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3937000036239624},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.3862999975681305},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.384799987077713},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36230000853538513},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3587999939918518},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.3334999978542328},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3068000078201294},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.29670000076293945},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2825999855995178},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.26750001311302185}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2025.3642821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2025.3642821","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:41379918","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41379918","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1070641079","display_name":null,"funder_award_id":"U25A20530","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2746168205","display_name":null,"funder_award_id":"62573399","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8480155402","display_name":null,"funder_award_id":"72188101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":75,"referenced_works":["https://openalex.org/W2593116425","https://openalex.org/W2931433835","https://openalex.org/W2962865004","https://openalex.org/W2963115079","https://openalex.org/W2964109005","https://openalex.org/W2970925270","https://openalex.org/W3048939150","https://openalex.org/W3093287838","https://openalex.org/W3094502228","https://openalex.org/W3110606395","https://openalex.org/W3118120400","https://openalex.org/W3170088426","https://openalex.org/W3174770825","https://openalex.org/W3175335326","https://openalex.org/W3175514052","https://openalex.org/W3198571508","https://openalex.org/W4205991051","https://openalex.org/W4214604251","https://openalex.org/W4226206782","https://openalex.org/W4226442948","https://openalex.org/W4282920689","https://openalex.org/W4309660795","https://openalex.org/W4312310776","https://openalex.org/W4312349930","https://openalex.org/W4312380001","https://openalex.org/W4312415534","https://openalex.org/W4312583638","https://openalex.org/W4312651322","https://openalex.org/W4312864739","https://openalex.org/W4312980231","https://openalex.org/W4313123347","https://openalex.org/W4385245566","https://openalex.org/W4386065620","https://openalex.org/W4386065882","https://openalex.org/W4386071547","https://openalex.org/W4386075647","https://openalex.org/W4386076600","https://openalex.org/W4386113246","https://openalex.org/W4387969495","https://openalex.org/W4388192054","https://openalex.org/W4390872773","https://openalex.org/W4393154284","https://openalex.org/W4393160420","https://openalex.org/W4397008817","https://openalex.org/W4399474077","https://openalex.org/W4400890495","https://openalex.org/W4402112401","https://openalex.org/W4402703041","https://openalex.org/W4402716319","https://openalex.org/W4402727025","https://openalex.org/W4402753597","https://openalex.org/W4402753794","https://openalex.org/W4402754293","https://openalex.org/W4402781078","https://openalex.org/W4402904133","https://openalex.org/W4402904188","https://openalex.org/W4402961769","https://openalex.org/W4403386295","https://openalex.org/W4403791349","https://openalex.org/W4403791445","https://openalex.org/W4403791622","https://openalex.org/W4403791730","https://openalex.org/W4403947222","https://openalex.org/W4403998639","https://openalex.org/W4404970580","https://openalex.org/W4408361498","https://openalex.org/W4409364991","https://openalex.org/W4409365689","https://openalex.org/W4409367415","https://openalex.org/W4410609966","https://openalex.org/W4413146674","https://openalex.org/W4413147087","https://openalex.org/W4415799111","https://openalex.org/W7133194086","https://openalex.org/W7133238718"],"related_works":[],"abstract_inverted_index":{"Mainstream":[0],"research":[1],"in":[2,39,58,153,233],"audio-visual":[3,129,192,196,207],"learning":[4],"has":[5],"focused":[6],"on":[7,238],"designing":[8],"task-specific":[9,178],"expert":[10],"models,":[11],"primarily":[12],"implemented":[13],"through":[14,62,94],"sophisticated":[15],"multimodal":[16],"fusion":[17],"approaches.":[18],"Recently,":[19],"a":[20,59,68,116,145,211],"few":[21,69],"efforts":[22],"have":[23],"aimed":[24],"to":[25,91,127,151,187,229],"develop":[26],"more":[27],"task-independent":[28],"or":[29,158],"universal":[30],"audiovisual":[31,41,240],"embedding":[32],"networks,":[33],"encoding":[34],"advanced":[35],"representations":[36],"for":[37,102,121],"use":[38],"various":[40],"downstream":[42,128],"tasks.":[43,130],"This":[44,169],"is":[45],"typically":[46],"achieved":[47],"by":[48,162],"fine-tuning":[49],"large":[50],"pretrained":[51,75,124,174],"transformers,":[52],"such":[53,64,190,205],"as":[54,65,191,206],"Swin-V2-L":[55],"and":[56,118,177,195,220,250,257],"HTS-AT,":[57],"parameter-efficient":[60],"manner":[61],"techniques":[63],"tuning":[66],"only":[67],"adapter":[70],"layers":[71],"inserted":[72],"into":[73,166],"the":[74,95,135,140,155,218,225],"transformer":[76,97,125,141,164,227],"backbone.":[77],"Although":[78],"these":[79],"methods":[80],"are":[81],"parameter-efficient,":[82],"they":[83],"suffer":[84],"from":[85,224],"significant":[86],"training":[87,251],"memory":[88,248],"consumption":[89],"due":[90],"gradient":[92],"backpropagation":[93],"deep":[96],"backbones,":[98],"which":[99,216],"limits":[100],"accessibility":[101],"researchers":[103],"with":[104],"constrained":[105],"computational":[106],"resources.":[107],"In":[108],"this":[109],"paper,":[110],"we":[111,209],"present":[112],"Meta-Token":[113,212],"Learning":[114],"(Mettle),":[115],"simple":[117],"memory-efficient":[119],"method":[120,245],"adapting":[122],"large-scale":[123],"models":[126],"Instead":[131],"of":[132,139],"sequentially":[133],"modifying":[134],"output":[136],"feature":[137,231],"distribution":[138],"backbone,":[142],"Mettle":[143],"utilizes":[144,217],"lightweight":[146],"Layer-Centric":[147],"Distillation":[148],"(LCD)":[149],"module":[150],"distill":[152],"parallel":[154],"intact":[156],"audio":[157,219],"visual":[159,221],"features":[160],"embedded":[161],"each":[163],"layer":[165,228],"compact":[167],"meta-tokens.":[168],"distillation":[170],"process":[171],"considers":[172],"both":[173],"knowledge":[175],"preservation":[176],"adaptation.":[179],"The":[180],"obtained":[181],"meta-tokens":[182,222],"can":[183],"be":[184],"directly":[185],"applied":[186],"classification":[188],"tasks,":[189,204],"event":[193],"localization":[194],"video":[197],"parsing.":[198],"To":[199],"further":[200],"support":[201],"fine-grained":[202],"segmentation":[203],"segmentation,":[208],"introduce":[210],"Injection":[213],"(MTI)":[214],"module,":[215],"distilled":[223],"top":[226],"guide":[230],"adaptation":[232],"earlier":[234],"layers.":[235],"Extensive":[236],"experiments":[237],"multiple":[239],"benchmarks":[241],"demonstrate":[242],"that":[243],"our":[244],"significantly":[246],"reduces":[247],"usage":[249],"time":[252],"while":[253],"maintaining":[254],"parameter":[255],"efficiency":[256],"competitive":[258],"accuracy.":[259]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-11T00:00:00"}
