{"id":"https://openalex.org/W4402716170","doi":"https://doi.org/10.1109/cvpr52733.2024.01245","title":"Distilling Vision-Language Models on Millions of Videos","display_name":"Distilling Vision-Language Models on Millions of Videos","publication_year":2024,"publication_date":"2024-06-16","ids":{"openalex":"https://openalex.org/W4402716170","doi":"https://doi.org/10.1109/cvpr52733.2024.01245"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52733.2024.01245","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52733.2024.01245","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107951791","display_name":"Yue Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yue Zhao","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101850043","display_name":"L. Zhao","orcid":"https://orcid.org/0000-0001-8921-8564"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Long Zhao","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103170520","display_name":"Xingyi Zhou","orcid":"https://orcid.org/0000-0002-0914-8525"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xingyi Zhou","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101825497","display_name":"Jialin Wu","orcid":"https://orcid.org/0000-0003-4684-5212"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jialin Wu","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083941504","display_name":"Chun-Te Chu","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chun-Te Chu","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101716735","display_name":"Hui Miao","orcid":"https://orcid.org/0000-0002-9839-3249"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hui Miao","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048629871","display_name":"Florian Schroff","orcid":"https://orcid.org/0000-0003-0570-8967"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Florian Schroff","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030402556","display_name":"Hartwig Adam","orcid":"https://orcid.org/0000-0003-1258-4341"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hartwig Adam","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418161","display_name":"Ting Liu","orcid":"https://orcid.org/0000-0003-0749-064X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ting Liu","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017319429","display_name":"Boqing Gong","orcid":"https://orcid.org/0000-0003-3915-5977"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Boqing Gong","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057535281","display_name":"Philipp Kr\u00e4henb\u00fchl","orcid":"https://orcid.org/0000-0002-9846-4369"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Philipp Kr\u00e4henb\u00fchl","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008194726","display_name":"Liangzhe Yuan","orcid":"https://orcid.org/0000-0001-9206-1908"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liangzhe Yuan","raw_affiliation_strings":["Google Research"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Research","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5107951791"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":2.326,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.89828195,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"13106","last_page":"13116"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.729477047920227},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4441757798194885},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3629681169986725},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.36068782210350037},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3399151563644409}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.729477047920227},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4441757798194885},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3629681169986725},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.36068782210350037},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3399151563644409}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52733.2024.01245","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52733.2024.01245","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.49000000953674316,"id":"https://metadata.un.org/sdg/2","display_name":"Zero hunger"}],"awards":[{"id":"https://openalex.org/G4121626586","display_name":null,"funder_award_id":"IIS-1845485","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":106,"referenced_works":["https://openalex.org/W764651262","https://openalex.org/W1861492603","https://openalex.org/W2194775991","https://openalex.org/W2425121537","https://openalex.org/W2487365028","https://openalex.org/W2560730294","https://openalex.org/W2765716052","https://openalex.org/W2851061873","https://openalex.org/W2886641317","https://openalex.org/W2938704169","https://openalex.org/W2948859046","https://openalex.org/W2962784628","https://openalex.org/W2963250244","https://openalex.org/W2963524571","https://openalex.org/W2963916161","https://openalex.org/W2964220823","https://openalex.org/W2978426779","https://openalex.org/W2980037812","https://openalex.org/W2989322838","https://openalex.org/W3001197829","https://openalex.org/W3034630387","https://openalex.org/W3094502228","https://openalex.org/W3108241103","https://openalex.org/W3126337491","https://openalex.org/W3158120491","https://openalex.org/W3166396011","https://openalex.org/W3170972077","https://openalex.org/W3172942063","https://openalex.org/W3175961224","https://openalex.org/W3203711169","https://openalex.org/W3204588463","https://openalex.org/W3205786327","https://openalex.org/W3213454282","https://openalex.org/W3215626407","https://openalex.org/W4200579548","https://openalex.org/W4205991051","https://openalex.org/W4221149883","https://openalex.org/W4225323055","https://openalex.org/W4247726808","https://openalex.org/W4283066680","https://openalex.org/W4285606530","https://openalex.org/W4286987939","https://openalex.org/W4292779060","https://openalex.org/W4296406182","https://openalex.org/W4297943387","https://openalex.org/W4306820534","https://openalex.org/W4310561894","https://openalex.org/W4310921506","https://openalex.org/W4312271977","https://openalex.org/W4312424618","https://openalex.org/W4312864639","https://openalex.org/W4313190371","https://openalex.org/W4318718936","https://openalex.org/W4319453767","https://openalex.org/W4322718191","https://openalex.org/W4323570346","https://openalex.org/W4364382845","https://openalex.org/W4366330503","https://openalex.org/W4376167553","https://openalex.org/W4384392955","https://openalex.org/W4385017974","https://openalex.org/W4385572634","https://openalex.org/W4386066095","https://openalex.org/W4386071687","https://openalex.org/W4386076314","https://openalex.org/W4387226763","https://openalex.org/W4387688013","https://openalex.org/W4390874575","https://openalex.org/W4394671432","https://openalex.org/W4402671548","https://openalex.org/W4402727516","https://openalex.org/W6687484953","https://openalex.org/W6761551260","https://openalex.org/W6762913911","https://openalex.org/W6763228578","https://openalex.org/W6769627184","https://openalex.org/W6770332682","https://openalex.org/W6773005947","https://openalex.org/W6778883912","https://openalex.org/W6784333009","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6800875267","https://openalex.org/W6803872405","https://openalex.org/W6804443999","https://openalex.org/W6809361247","https://openalex.org/W6810334672","https://openalex.org/W6811072154","https://openalex.org/W6838434436","https://openalex.org/W6842585177","https://openalex.org/W6843071136","https://openalex.org/W6846007759","https://openalex.org/W6846867676","https://openalex.org/W6847386241","https://openalex.org/W6849177959","https://openalex.org/W6849878650","https://openalex.org/W6850625674","https://openalex.org/W6851149231","https://openalex.org/W6851592950","https://openalex.org/W6851744038","https://openalex.org/W6852776751","https://openalex.org/W6853515732","https://openalex.org/W6854451932","https://openalex.org/W6854992507","https://openalex.org/W6857703410","https://openalex.org/W6858147810"],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2775347418","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"The":[0,51],"recent":[1],"advance":[2],"in":[3],"vision-language":[4,139],"models":[5],"is":[6,27,58,129],"largely":[7],"attributed":[8],"to":[9,17,37,61,66,165],"the":[10,72,89,133,160],"abundance":[11],"of":[12,64,82],"image-text":[13],"data.":[14,50],"We":[15,34,70],"aim":[16],"replicate":[18],"this":[19],"success":[20],"for":[21,104],"video-language":[22,40,74,83,120],"models,":[23],"but":[24],"there":[25],"simply":[26],"not":[28],"enough":[29],"human-curated":[30],"video-text":[31],"data":[32],"available.":[33],"thus":[35],"resort":[36],"fine-tuning":[38],"a":[39,43,79,119,155],"model":[41,54,75,100,122,143],"from":[42],"strong":[44],"image-language":[45],"baseline":[46,135],"with":[47],"syn-thesized":[48],"instructional":[49],"resulting":[52],"video":[53,162],"by":[55,96,152],"video-instruction-tuning":[56],"(VIIT)":[57],"then":[59],"used":[60],"auto-label":[62],"millions":[63],"videos":[65],"generate":[67,159],"high-quality":[68],"captions.":[69],"show":[71,117],"adapted":[73],"performs":[76],"well":[77],"on":[78,93,125,147],"wide":[80],"range":[81],"benchmarks.":[84],"For":[85],"instance,":[86],"it":[87],"surpasses":[88],"best":[90,142],"prior":[91],"result":[92],"open-ended":[94],"NExT-QA":[95],"2.8%.":[97],"Besides,":[98],"our":[99],"generates":[101],"detailed":[102],"descriptions":[103],"previously":[105],"unseen":[106],"videos,":[107],"which":[108],"provide":[109],"better":[110,131],"textual":[111],"supervision":[112],"than":[113,132],"existing":[114],"methods.":[115],"Experiments":[116],"that":[118,136],"dual-encoder":[121],"contrastively":[123],"trained":[124],"these":[126],"auto-generated":[127],"captions":[128],"3.8%":[130],"strongest":[134],"also":[137],"leverages":[138],"models.":[140],"Our":[141],"outperforms":[144],"state-of-the-art":[145],"methods":[146],"MSR-VTT":[148],"zero-shot":[149],"text-to-video":[150],"retrieval":[151],"6%.":[153],"As":[154],"side":[156],"product,":[157],"we":[158],"largest":[161],"capation":[163],"dataset":[164],"date.":[166]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
