{"id":"https://openalex.org/W4405778808","doi":"https://doi.org/10.1109/tmm.2024.3521729","title":"VLAB: Enhancing Video Language Pretraining by Feature Adapting and Blending","display_name":"VLAB: Enhancing Video Language Pretraining by Feature Adapting and Blending","publication_year":2024,"publication_date":"2024-12-25","ids":{"openalex":"https://openalex.org/W4405778808","doi":"https://doi.org/10.1109/tmm.2024.3521729"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3521729","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3521729","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101777772","display_name":"Xingjian He","orcid":"https://orcid.org/0000-0001-5396-6253"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xingjian He","raw_affiliation_strings":["Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101953192","display_name":"Sihan Chen","orcid":"https://orcid.org/0000-0002-9453-6265"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sihan Chen","raw_affiliation_strings":["Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017393243","display_name":"Fan Ma","orcid":"https://orcid.org/0000-0002-4131-1222"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fan Ma","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University, Zhejiang, China","institution_ids":["https://openalex.org/I168879160"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012273973","display_name":"Zhicheng Huang","orcid":"https://orcid.org/0000-0003-0399-9668"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhicheng Huang","raw_affiliation_strings":["University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100668699","display_name":"Xiaojie Jin","orcid":"https://orcid.org/0000-0002-7850-1353"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaojie Jin","raw_affiliation_strings":["Bytedance Inc., Beijing, China","Bytedance Inc"],"affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]},{"raw_affiliation_string":"Bytedance Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008547683","display_name":"Zikang Liu","orcid":"https://orcid.org/0000-0002-8947-7053"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zikang Liu","raw_affiliation_strings":["Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016681542","display_name":"Dongmei Fu","orcid":"https://orcid.org/0000-0003-3918-9448"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongmei Fu","raw_affiliation_strings":["University of Science and Technology Beijing, Beijing, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005421447","display_name":"Yi Yang","orcid":"https://orcid.org/0000-0002-0512-880X"},"institutions":[{"id":"https://openalex.org/I168879160","display_name":"Zhejiang University of Science and Technology","ror":"https://ror.org/05mx0wr29","country_code":"CN","type":"education","lineage":["https://openalex.org/I168879160"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Yang","raw_affiliation_strings":["College of Computer Science and Technology, Zhejiang University, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Zhejiang University, Zhejiang, China","institution_ids":["https://openalex.org/I168879160"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108392430","display_name":"Jing Liu","orcid":"https://orcid.org/0000-0003-0903-9131"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Liu","raw_affiliation_strings":["Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Laboratory of Cognition and Decision Intelligence for Complex Systems, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100668696","display_name":"Jiashi Feng","orcid":"https://orcid.org/0000-0001-6843-0064"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiashi Feng","raw_affiliation_strings":["Bytedance Inc., Beijing, China","Bytedance Inc"],"affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]},{"raw_affiliation_string":"Bytedance Inc","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5101777772"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210094879"],"apc_list":null,"apc_paid":null,"fwci":1.4699,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.84197483,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"27","issue":null,"first_page":"2168","last_page":"2180"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9785000085830688,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9775000214576721,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8485822081565857},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6214044690132141},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6117360591888428},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.5021002292633057},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5006420612335205},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4625508189201355},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.37584495544433594},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.34813833236694336},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.336980402469635},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09696745872497559}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8485822081565857},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6214044690132141},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6117360591888428},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.5021002292633057},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5006420612335205},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4625508189201355},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.37584495544433594},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.34813833236694336},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.336980402469635},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09696745872497559},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3521729","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3521729","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5899999737739563,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G1283858192","display_name":null,"funder_award_id":"U21B2043","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1508456709","display_name":null,"funder_award_id":"62102416","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":79,"referenced_works":["https://openalex.org/W1956340063","https://openalex.org/W2277195237","https://openalex.org/W2425121537","https://openalex.org/W2606982687","https://openalex.org/W2765716052","https://openalex.org/W2886641317","https://openalex.org/W2943885184","https://openalex.org/W2952132648","https://openalex.org/W2962945654","https://openalex.org/W2963017553","https://openalex.org/W2963341956","https://openalex.org/W3005881764","https://openalex.org/W3035635319","https://openalex.org/W3087975588","https://openalex.org/W3091588028","https://openalex.org/W3105232955","https://openalex.org/W3173223111","https://openalex.org/W3176641147","https://openalex.org/W3204588463","https://openalex.org/W3204670646","https://openalex.org/W3205021045","https://openalex.org/W3217340782","https://openalex.org/W4285606530","https://openalex.org/W4304014690","https://openalex.org/W4312299780","https://openalex.org/W4312463400","https://openalex.org/W4312884055","https://openalex.org/W4313136445","https://openalex.org/W4313186260","https://openalex.org/W4316661142","https://openalex.org/W4365385796","https://openalex.org/W4386071687","https://openalex.org/W4386076010","https://openalex.org/W4386076522","https://openalex.org/W4386076661","https://openalex.org/W4386083024","https://openalex.org/W4390871765","https://openalex.org/W4403511263","https://openalex.org/W4404003099","https://openalex.org/W6639432524","https://openalex.org/W6676497082","https://openalex.org/W6678262379","https://openalex.org/W6682631176","https://openalex.org/W6738045163","https://openalex.org/W6755207826","https://openalex.org/W6761014454","https://openalex.org/W6761197245","https://openalex.org/W6762122294","https://openalex.org/W6763228578","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6796581206","https://openalex.org/W6797109355","https://openalex.org/W6797148833","https://openalex.org/W6800139874","https://openalex.org/W6801013943","https://openalex.org/W6802744804","https://openalex.org/W6803545775","https://openalex.org/W6805349323","https://openalex.org/W6810042059","https://openalex.org/W6810334672","https://openalex.org/W6811013733","https://openalex.org/W6811072154","https://openalex.org/W6838434436","https://openalex.org/W6838701581","https://openalex.org/W6838756091","https://openalex.org/W6839015040","https://openalex.org/W6839670772","https://openalex.org/W6839948470","https://openalex.org/W6841325665","https://openalex.org/W6843018836","https://openalex.org/W6843263998","https://openalex.org/W6846313647","https://openalex.org/W6846867676","https://openalex.org/W6849177959","https://openalex.org/W6849307739","https://openalex.org/W6864544085","https://openalex.org/W6891781542","https://openalex.org/W6898505805"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2997094352","https://openalex.org/W3216976533","https://openalex.org/W100620283","https://openalex.org/W2495260952","https://openalex.org/W4366179611","https://openalex.org/W2996078371"],"abstract_inverted_index":{"Large-scale":[0],"image-text":[1],"contrastive":[2,124],"pre-training":[3,44,54,70],"models,":[4],"such":[5],"as":[6],"CLIP,":[7],"have":[8],"been":[9],"demonstrated":[10],"to":[11,68,108,121],"effectively":[12],"learn":[13],"high-quality":[14],"multimodal":[15,29,76,167],"representations.":[16],"However,":[17],"there":[18],"is":[19,87],"limited":[20],"research":[21],"on":[22,32,89,163,193],"learning":[23],"video-text":[24,43,83],"representations":[25,67],"for":[26,78],"general":[27],"video":[28,69,75,105,150,166,170,173,176,190],"tasks":[30,71],"based":[31],"these":[33],"powerful":[34],"features.":[35,151],"Towards":[36],"this":[37],"goal,":[38],"we":[39,101,131],"propose":[40,132],"a":[41,79,103],"novel":[42],"method":[45,136],"dubbed":[46],"VLAB:":[47],"<bold":[48,51,57,61],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[49,52,58,62],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">V</b>ideo":[50],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">L</b>anguage":[53],"by":[55,143],"feature":[56,93,96],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">A</b>dapting":[59],"and":[60,72,95,116,125,149,156,175,185,196,206],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">B</b>lending,":[63],"which":[64],"transfers":[65],"CLIP":[66],"develops":[73],"unified":[74],"models":[77],"wide":[80],"range":[81],"of":[82,147,158,203],"tasks.":[84,127],"Specifically,":[85],"VLAB":[86,159,180],"founded":[88],"two":[90],"key":[91],"strategies:":[92],"adapting":[94],"blending.":[97],"In":[98,128],"the":[99,118,129,140,145,154],"former,":[100],"introduce":[102],"new":[104,187],"adapter":[106],"module":[107],"address":[109],"CLIP's":[110],"deficiency":[111],"in":[112,189],"modeling":[113],"temporal":[114],"information":[115],"extend":[117],"model's":[119,141],"capability":[120],"encompass":[122],"both":[123],"generative":[126],"latter,":[130],"an":[133,201],"end-to-end":[134],"training":[135],"that":[137],"further":[138],"enhances":[139],"performance":[142],"exploiting":[144],"complementarity":[146],"image":[148],"We":[152],"validate":[153],"effectiveness":[155],"versatility":[157],"through":[160],"extensive":[161],"experiments":[162],"highly":[164],"competitive":[165],"tasks,":[168],"including":[169],"text":[171],"retrieval,":[172],"captioning,":[174],"question":[177,191],"answering.":[178],"Remarkably,":[179],"outperforms":[181],"competing":[182],"methods":[183],"significantly":[184],"sets":[186],"records":[188],"answering":[192],"MSRVTT,":[194],"MSVD,":[195],"TGIF":[197],"datasets.":[198],"It":[199],"achieves":[200],"accuracy":[202],"49.6,":[204],"60.9,":[205],"79.0,":[207],"respectively.":[208]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
