{"id":"https://openalex.org/W4414085675","doi":"https://doi.org/10.1109/tnnls.2025.3605657","title":"Temporal Modeling With Frozen Vision\u2013Language Foundation Models for Parameter-Efficient Text\u2013Video Retrieval","display_name":"Temporal Modeling With Frozen Vision\u2013Language Foundation Models for Parameter-Efficient Text\u2013Video Retrieval","publication_year":2025,"publication_date":"2025-09-09","ids":{"openalex":"https://openalex.org/W4414085675","doi":"https://doi.org/10.1109/tnnls.2025.3605657","pmid":"https://pubmed.ncbi.nlm.nih.gov/40924521"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2025.3605657","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3605657","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002748588","display_name":"Leqi Shen","orcid":"https://orcid.org/0000-0002-7742-9142"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Leqi Shen","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology (BNRist) and the School of Software, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-7742-9142","affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology (BNRist) and the School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tianxiang Hao","orcid":"https://orcid.org/0000-0002-1952-6083"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianxiang Hao","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology (BNRist) and the School of Software, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-1952-6083","affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology (BNRist) and the School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060002786","display_name":"Tao He","orcid":"https://orcid.org/0000-0002-5796-2177"},"institutions":[{"id":"https://openalex.org/I917184967","display_name":"Bank of China","ror":"https://ror.org/02mt4s337","country_code":"CN","type":"other","lineage":["https://openalex.org/I917184967"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao He","raw_affiliation_strings":["GRG Banking Equipment Company Ltd., Guangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"GRG Banking Equipment Company Ltd., Guangzhou, China","institution_ids":["https://openalex.org/I917184967"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100347986","display_name":"Yifeng Zhang","orcid":"https://orcid.org/0000-0003-1205-9475"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifeng Zhang","raw_affiliation_strings":["JD.com Inc., Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD.com Inc., Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054904119","display_name":"Pengzhang Liu","orcid":"https://orcid.org/0000-0002-6031-5245"},"institutions":[{"id":"https://openalex.org/I4210103986","display_name":"Jingdong (China)","ror":"https://ror.org/01dkjkq64","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210103986"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengzhang Liu","raw_affiliation_strings":["JD.com Inc., Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"JD.com Inc., Beijing, China","institution_ids":["https://openalex.org/I4210103986"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051149140","display_name":"Sicheng Zhao","orcid":"https://orcid.org/0000-0001-5843-6411"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sicheng Zhao","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5843-6411","affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046605531","display_name":"Jungong Han","orcid":"https://orcid.org/0000-0003-4361-956X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jungong Han","raw_affiliation_strings":["Department of Automation, Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-4361-956X","affiliations":[{"raw_affiliation_string":"Department of Automation, Beijing National Research Center for Information Science and Technology (BNRist), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057732142","display_name":"Guiguang Ding","orcid":"https://orcid.org/0000-0003-0137-9975"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guiguang Ding","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology (BNRist) and the School of Software, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-0137-9975","affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology (BNRist) and the School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5002748588"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":1.1332,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82285714,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"36","issue":"10","first_page":"17527","last_page":"17540"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.983299970626831,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.7742999792098999},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5501999855041504},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5374000072479248},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34360000491142273},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3140999972820282},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.29660001397132874}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7760000228881836},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.7742999792098999},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5888000130653381},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5501999855041504},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5374000072479248},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4302999973297119},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34360000491142273},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3140999972820282},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C77277458","wikidata":"https://www.wikidata.org/wiki/Q1969246","display_name":"Temporal database","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C3261483","wikidata":"https://www.wikidata.org/wiki/Q119565","display_name":"Frame rate","level":2,"score":0.2736999988555908},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26969999074935913}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2025.3605657","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3605657","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:40924521","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40924521","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2365988345","display_name":null,"funder_award_id":"62525103","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G338250432","display_name":null,"funder_award_id":"62441235","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4144755178","display_name":null,"funder_award_id":"62571294","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7091651887","display_name":null,"funder_award_id":"62021002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1893116441","https://openalex.org/W1905882502","https://openalex.org/W1933349210","https://openalex.org/W2079735306","https://openalex.org/W2425121537","https://openalex.org/W2565656701","https://openalex.org/W2885775891","https://openalex.org/W2897439619","https://openalex.org/W2963017553","https://openalex.org/W2963916161","https://openalex.org/W2984008963","https://openalex.org/W3033740891","https://openalex.org/W3035356601","https://openalex.org/W3043840704","https://openalex.org/W3130796238","https://openalex.org/W3168640669","https://openalex.org/W3198377975","https://openalex.org/W3204588463","https://openalex.org/W4214633470","https://openalex.org/W4285606530","https://openalex.org/W4307233751","https://openalex.org/W4312299780","https://openalex.org/W4312310776","https://openalex.org/W4312372711","https://openalex.org/W4312480274","https://openalex.org/W4312651322","https://openalex.org/W4312999114","https://openalex.org/W4386071547","https://openalex.org/W4386076265","https://openalex.org/W4386076289","https://openalex.org/W4386076600","https://openalex.org/W4388854793","https://openalex.org/W4389317971","https://openalex.org/W4390873054","https://openalex.org/W4390873082","https://openalex.org/W4390874542","https://openalex.org/W4390874668","https://openalex.org/W4395056490","https://openalex.org/W4402703062","https://openalex.org/W4402704596","https://openalex.org/W4402809401","https://openalex.org/W4406903228","https://openalex.org/W4413259066"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2381393187","https://openalex.org/W2332779545","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W2358060160","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Temporal":[0],"modeling":[1,57,76,82],"plays":[2],"an":[3,187],"important":[4],"role":[5],"in":[6,161,191],"the":[7,11,66,91,119,167,197,203],"effective":[8],"adaption":[9],"of":[10,145,169,196],"powerful":[12],"pretrained":[13],"text-image":[14],"foundation":[15,49,61],"model":[16,65],"into":[17],"text-video":[18],"retrieval.":[19],"However,":[20],"existing":[21,163],"methods":[22,208],"often":[23],"rely":[24],"on":[25,172,182],"additional":[26],"heavy":[27,44],"trainable":[28,143],"modules,":[29],"such":[30,43],"as":[31,108,133],"transformer":[32],"or":[33],"BiLSTM,":[34],"which":[35],"are":[36,156],"inefficient.":[37],"In":[38],"contrast,":[39],"we":[40,54],"avoid":[41],"introducing":[42],"components":[45],"by":[46],"leveraging":[47],"frozen":[48,59,85,101,125],"models.":[50],"To":[51],"this":[52],"end,":[53],"propose":[55],"temporal":[56,67,75,81,113,116],"with":[58,69,179,193,209],"vision-language":[60],"models":[62],"(TFVL)":[63],"to":[64,104,128],"dynamics":[68],"fixed":[70],"encoders.":[71],"Specifically,":[72],"text":[73,86,102],"encoder":[74,80,103,127],"(TextTemp)":[77],"and":[78,87,94,152,176],"image":[79,88,126],"(ImageTemp)":[83],"apply":[84],"encoders":[89],"within":[90,111],"video":[92,95],"head":[93],"backbone,":[96],"respectively.":[97],"TextTemp":[98],"uses":[99,123],"a":[100,112,124,134,149],"interpret":[105],"frame":[106,131],"representations":[107],"\"visual":[109],"words\"":[110],"\"sentence,\"":[114],"capturing":[115],"dependencies.":[117],"On":[118],"other":[120,162],"hand,":[121],"ImageTemp":[122],"treat":[129],"all":[130],"tokens":[132],"unified":[135],"visual":[136],"entity,":[137],"learning":[138],"spatiotemporal":[139],"information.":[140],"The":[141],"total":[142],"parameters":[144],"our":[146,170,184],"method,":[147],"comprising":[148],"lightweight":[150],"projection":[151],"several":[153],"prompt":[154],"tokens,":[155],"significantly":[157,210],"fewer":[158,211],"than":[159],"those":[160],"methods.":[164],"We":[165],"evaluate":[166],"effectiveness":[168],"method":[171],"MSR-VTT,":[173,183],"DiDeMo,":[174],"ActivityNet,":[175],"LSMDC.":[177],"Compared":[178],"full":[180],"fine-tuning":[181],"TFVL":[185,205],"achieves":[186],"average":[188],"3.25%":[189],"gain":[190],"R@1":[192],"merely":[194],"0.35%":[195],"parameters.":[198,212],"Extensive":[199],"experiments":[200],"demonstrate":[201],"that":[202],"proposed":[204],"outperforms":[206],"state-of-the-art":[207]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
