{"id":"https://openalex.org/W4375869474","doi":"https://doi.org/10.1109/icassp49357.2023.10094821","title":"C2KD: Cross-Lingual Cross-Modal Knowledge Distillation for Multilingual Text-Video Retrieval","display_name":"C2KD: Cross-Lingual Cross-Modal Knowledge Distillation for Multilingual Text-Video Retrieval","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869474","doi":"https://doi.org/10.1109/icassp49357.2023.10094821"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10094821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094821","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5010091252","display_name":"Andrew Rouditchenko","orcid":"https://orcid.org/0000-0002-0063-3612"},"institutions":[{"id":"https://openalex.org/I4210109586","display_name":"Moscow Institute of Thermal Technology","ror":"https://ror.org/021es5e59","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210109586"]}],"countries":["RU"],"is_corresponding":true,"raw_author_name":"Andrew Rouditchenko","raw_affiliation_strings":["MIT"],"affiliations":[{"raw_affiliation_string":"MIT","institution_ids":["https://openalex.org/I4210109586"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058729228","display_name":"Yung-Sung Chuang","orcid":"https://orcid.org/0000-0002-1723-5063"},"institutions":[{"id":"https://openalex.org/I4210109586","display_name":"Moscow Institute of Thermal Technology","ror":"https://ror.org/021es5e59","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210109586"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Yung-Sung Chuang","raw_affiliation_strings":["MIT"],"affiliations":[{"raw_affiliation_string":"MIT","institution_ids":["https://openalex.org/I4210109586"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026760560","display_name":"Nina Shvetsova","orcid":"https://orcid.org/0009-0004-9848-3238"},"institutions":[{"id":"https://openalex.org/I114090438","display_name":"Goethe University Frankfurt","ror":"https://ror.org/04cvxnb49","country_code":"DE","type":"education","lineage":["https://openalex.org/I114090438"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Nina Shvetsova","raw_affiliation_strings":["Goethe University Frankfurt"],"affiliations":[{"raw_affiliation_string":"Goethe University Frankfurt","institution_ids":["https://openalex.org/I114090438"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101787514","display_name":"Samuel Thomas","orcid":"https://orcid.org/0000-0001-7573-0620"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samuel Thomas","raw_affiliation_strings":["IBM Research AI","MIT-IBM Watson AI Lab"],"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]},{"raw_affiliation_string":"MIT-IBM Watson AI Lab","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052325109","display_name":"Rog\u00e9rio Feris","orcid":"https://orcid.org/0000-0001-6399-0679"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rogerio Feris","raw_affiliation_strings":["IBM Research AI","MIT-IBM Watson AI Lab"],"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]},{"raw_affiliation_string":"MIT-IBM Watson AI Lab","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003725957","display_name":"Brian Kingsbury","orcid":"https://orcid.org/0000-0002-1343-6837"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian Kingsbury","raw_affiliation_strings":["IBM Research AI","MIT-IBM Watson AI Lab"],"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]},{"raw_affiliation_string":"MIT-IBM Watson AI Lab","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020676344","display_name":"Leonid Karlinsky","orcid":"https://orcid.org/0000-0003-2524-2068"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Leonid Karlinsky","raw_affiliation_strings":["IBM Research AI","MIT-IBM Watson AI Lab"],"affiliations":[{"raw_affiliation_string":"IBM Research AI","institution_ids":[]},{"raw_affiliation_string":"MIT-IBM Watson AI Lab","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004717608","display_name":"David Harwath","orcid":"https://orcid.org/0000-0003-0206-0253"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Harwath","raw_affiliation_strings":["UT Austin"],"affiliations":[{"raw_affiliation_string":"UT Austin","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011795407","display_name":"Hilde Kuehne","orcid":"https://orcid.org/0000-0003-1079-4441"},"institutions":[{"id":"https://openalex.org/I114090438","display_name":"Goethe University Frankfurt","ror":"https://ror.org/04cvxnb49","country_code":"DE","type":"education","lineage":["https://openalex.org/I114090438"]},{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["DE","US"],"is_corresponding":false,"raw_author_name":"Hilde Kuehne","raw_affiliation_strings":["Goethe University Frankfurt","MIT-IBM Watson AI Lab"],"affiliations":[{"raw_affiliation_string":"Goethe University Frankfurt","institution_ids":["https://openalex.org/I114090438"]},{"raw_affiliation_string":"MIT-IBM Watson AI Lab","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112758056","display_name":"James Glass","orcid":"https://orcid.org/0000-0002-3097-360X"},"institutions":[{"id":"https://openalex.org/I4210109586","display_name":"Moscow Institute of Thermal Technology","ror":"https://ror.org/021es5e59","country_code":"RU","type":"facility","lineage":["https://openalex.org/I4210109586"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"James Glass","raw_affiliation_strings":["MIT"],"affiliations":[{"raw_affiliation_string":"MIT","institution_ids":["https://openalex.org/I4210109586"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5010091252"],"corresponding_institution_ids":["https://openalex.org/I4210109586"],"apc_list":null,"apc_paid":null,"fwci":0.7379,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.71846295,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8571017384529114},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5966360569000244},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5660363435745239},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5555291771888733},{"id":"https://openalex.org/keywords/text-retrieval","display_name":"Text retrieval","score":0.5325120091438293},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.500391960144043},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4995384216308594},{"id":"https://openalex.org/keywords/video-retrieval","display_name":"Video retrieval","score":0.47656893730163574},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.1358354389667511}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8571017384529114},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5966360569000244},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5660363435745239},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5555291771888733},{"id":"https://openalex.org/C2985933255","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Text retrieval","level":2,"score":0.5325120091438293},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.500391960144043},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4995384216308594},{"id":"https://openalex.org/C2983174267","wikidata":"https://www.wikidata.org/wiki/Q3775098","display_name":"Video retrieval","level":2,"score":0.47656893730163574},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.1358354389667511},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10094821","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10094821","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8799999952316284}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2259472270","https://openalex.org/W2425121537","https://openalex.org/W2619947201","https://openalex.org/W2842511635","https://openalex.org/W2883672905","https://openalex.org/W2952132648","https://openalex.org/W2970641574","https://openalex.org/W2975357369","https://openalex.org/W2978223337","https://openalex.org/W2984008963","https://openalex.org/W2989322838","https://openalex.org/W3035390927","https://openalex.org/W3039695075","https://openalex.org/W3100806282","https://openalex.org/W3106525532","https://openalex.org/W3153005511","https://openalex.org/W3156636935","https://openalex.org/W3171927989","https://openalex.org/W3197828817","https://openalex.org/W3206628126","https://openalex.org/W4297808394","https://openalex.org/W4385245566","https://openalex.org/W6638523607","https://openalex.org/W6682948231","https://openalex.org/W6692563993","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6767853649","https://openalex.org/W6791353385","https://openalex.org/W6955071965"],"related_works":["https://openalex.org/W4386640819","https://openalex.org/W2347420427","https://openalex.org/W1563547568","https://openalex.org/W2157187677","https://openalex.org/W1873761914","https://openalex.org/W4385643649","https://openalex.org/W4384919675","https://openalex.org/W1516986247","https://openalex.org/W4312122622","https://openalex.org/W1498836399"],"abstract_inverted_index":{"Multilingual":[0],"text-video":[1,31,39,82,120],"retrieval":[2,40,121],"methods":[3],"have":[4],"improved":[5],"significantly":[6],"in":[7,52,66,107],"recent":[8],"years,":[9],"but":[10],"the":[11,35,57,77,80,91,104,108,140],"performance":[12,122],"for":[13],"languages":[14,54],"other":[15,42,114,127],"than":[16],"English":[17,38,105],"still":[18],"lags.":[19],"We":[20,68,94,134],"propose":[21,69],"a":[22,46,70,96],"Cross-Lingual":[23],"Cross-Modal":[24],"Knowledge":[25],"Distillation":[26],"method":[27,117],"to":[28,55,85,88,112],"improve":[29],"multilingual":[30,98,119,144],"retrieval.":[32],"Inspired":[33],"by":[34,102],"fact":[36],"that":[37],"outperforms":[41],"languages,":[43],"we":[44],"train":[45],"student":[47],"model":[48],"using":[49,63],"input":[50,64],"text":[51,65,145],"different":[53,143],"match":[56],"cross-modal":[58],"predictions":[59],"from":[60],"teacher":[61,92],"models":[62,146],"English.":[67],"cross":[71],"entropy":[72],"based":[73],"objective":[74],"which":[75],"forces":[76],"distribution":[78],"over":[79],"student\u2019s":[81],"similarity":[83],"scores":[84],"be":[86],"similar":[87],"those":[89],"of":[90,142],"models.":[93],"introduce":[95],"new":[97],"video":[99,110],"dataset,":[100],"Multi-YouCook2,":[101],"translating":[103],"captions":[106],"YouCook2":[109],"dataset":[111],"8":[113],"languages.":[115],"Our":[116],"improves":[118],"on":[123,139],"Multi-YouCook2":[124],"and":[125,132],"several":[126],"datasets":[128],"such":[129],"as":[130,147],"Multi-MSRVTT":[131],"VATEX.":[133],"also":[135],"conducted":[136],"an":[137],"analysis":[138],"effectiveness":[141],"teachers.":[148]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
