{"id":"https://openalex.org/W2981036708","doi":"https://doi.org/10.1109/wacv45572.2020.9093438","title":"Coordinated Joint Multimodal Embeddings for Generalized Audio-Visual Zero-shot Classification and Retrieval of Videos","display_name":"Coordinated Joint Multimodal Embeddings for Generalized Audio-Visual Zero-shot Classification and Retrieval of Videos","publication_year":2020,"publication_date":"2020-03-01","ids":{"openalex":"https://openalex.org/W2981036708","doi":"https://doi.org/10.1109/wacv45572.2020.9093438","mag":"2981036708"},"language":"en","primary_location":{"id":"doi:10.1109/wacv45572.2020.9093438","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv45572.2020.9093438","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1910.08732","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041837701","display_name":"Kranti Kumar Parida","orcid":"https://orcid.org/0000-0001-5454-9144"},"institutions":[{"id":"https://openalex.org/I94234084","display_name":"Indian Institute of Technology Kanpur","ror":"https://ror.org/05pjsgx75","country_code":"IN","type":"education","lineage":["https://openalex.org/I94234084"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Kranti Kumar Parida","raw_affiliation_strings":["IIT Kanpur"],"affiliations":[{"raw_affiliation_string":"IIT Kanpur","institution_ids":["https://openalex.org/I94234084"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034793449","display_name":"Neeraj Matiyali","orcid":"https://orcid.org/0000-0003-4417-7820"},"institutions":[{"id":"https://openalex.org/I94234084","display_name":"Indian Institute of Technology Kanpur","ror":"https://ror.org/05pjsgx75","country_code":"IN","type":"education","lineage":["https://openalex.org/I94234084"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Neeraj Matiyali","raw_affiliation_strings":["IIT Kanpur"],"affiliations":[{"raw_affiliation_string":"IIT Kanpur","institution_ids":["https://openalex.org/I94234084"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021354054","display_name":"Tanaya Guha","orcid":"https://orcid.org/0000-0003-2167-4891"},"institutions":[{"id":"https://openalex.org/I39555362","display_name":"University of Warwick","ror":"https://ror.org/01a77tt86","country_code":"GB","type":"education","lineage":["https://openalex.org/I39555362"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Tanaya Guha","raw_affiliation_strings":["University of Warwick"],"affiliations":[{"raw_affiliation_string":"University of Warwick","institution_ids":["https://openalex.org/I39555362"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100705959","display_name":"Gaurav Sharma","orcid":"https://orcid.org/0000-0001-9735-9519"},"institutions":[{"id":"https://openalex.org/I4210107353","display_name":"NEC (United States)","ror":"https://ror.org/01v791m31","country_code":"US","type":"company","lineage":["https://openalex.org/I118347220","https://openalex.org/I4210107353"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gaurav Sharma","raw_affiliation_strings":["NEC Labs America"],"affiliations":[{"raw_affiliation_string":"NEC Labs America","institution_ids":["https://openalex.org/I4210107353"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5041837701"],"corresponding_institution_ids":["https://openalex.org/I94234084"],"apc_list":null,"apc_paid":null,"fwci":0.3048,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.50768828,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"3240","last_page":"3249"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9656999707221985,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8102090358734131},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.7862533330917358},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6594995856285095},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6392371654510498},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6357227563858032},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.6227268576622009},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6153244376182556},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5798560976982117},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.512925386428833},{"id":"https://openalex.org/keywords/multimodal-learning","display_name":"Multimodal learning","score":0.4607987701892853},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.43746721744537354},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.33844494819641113},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33717286586761475},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3239540159702301},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.07258269190788269}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8102090358734131},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.7862533330917358},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6594995856285095},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6392371654510498},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6357227563858032},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.6227268576622009},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6153244376182556},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5798560976982117},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.512925386428833},{"id":"https://openalex.org/C2780660688","wikidata":"https://www.wikidata.org/wiki/Q25052564","display_name":"Multimodal learning","level":2,"score":0.4607987701892853},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43746721744537354},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.33844494819641113},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33717286586761475},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3239540159702301},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.07258269190788269},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/wacv45572.2020.9093438","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv45572.2020.9093438","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"},{"id":"pmh:oai:wrap.warwick.ac.uk:129917","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400665","display_name":"Warwick Research Archive Portal (University of Warwick)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I39555362","host_organization_name":"University of Warwick","host_organization_lineage":["https://openalex.org/I39555362"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Conference Item"},{"id":"pmh:oai:arXiv.org:1910.08732","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1910.08732","pdf_url":"https://arxiv.org/pdf/1910.08732","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2981036708","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/1910.08732","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1910.08732","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1910.08732","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1910.08732","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1910.08732","pdf_url":"https://arxiv.org/pdf/1910.08732","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5899999737739563}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W93016980","https://openalex.org/W652269744","https://openalex.org/W1975077471","https://openalex.org/W1982795953","https://openalex.org/W2099471712","https://openalex.org/W2105582566","https://openalex.org/W2124033848","https://openalex.org/W2128532956","https://openalex.org/W2171061940","https://openalex.org/W2334493732","https://openalex.org/W2400717490","https://openalex.org/W2511428026","https://openalex.org/W2520613337","https://openalex.org/W2593116425","https://openalex.org/W2611632661","https://openalex.org/W2619697695","https://openalex.org/W2740825418","https://openalex.org/W2789366140","https://openalex.org/W2885402090","https://openalex.org/W2921950349","https://openalex.org/W2949999304","https://openalex.org/W2962756039","https://openalex.org/W2962772361","https://openalex.org/W2962865004","https://openalex.org/W2962910554","https://openalex.org/W2962945761","https://openalex.org/W2962960500","https://openalex.org/W2963115079","https://openalex.org/W2963218389","https://openalex.org/W2963499153","https://openalex.org/W2963524571","https://openalex.org/W2963545832","https://openalex.org/W2963680395","https://openalex.org/W2963689837","https://openalex.org/W2963854535","https://openalex.org/W2963887950","https://openalex.org/W2963936013","https://openalex.org/W2963960318","https://openalex.org/W2964307109","https://openalex.org/W2978987836","https://openalex.org/W2997685131","https://openalex.org/W3100093508","https://openalex.org/W3123318516","https://openalex.org/W4237951138","https://openalex.org/W4289665794","https://openalex.org/W6603820874","https://openalex.org/W6678360021","https://openalex.org/W6678470764","https://openalex.org/W6713645886","https://openalex.org/W6725104640","https://openalex.org/W6729831399","https://openalex.org/W6740580305","https://openalex.org/W6746240808","https://openalex.org/W6747248625","https://openalex.org/W6749149297","https://openalex.org/W6749179674","https://openalex.org/W6756219837","https://openalex.org/W6784846827"],"related_works":["https://openalex.org/W2036931824","https://openalex.org/W2255887839","https://openalex.org/W2910905530","https://openalex.org/W3013561077","https://openalex.org/W2047889412","https://openalex.org/W2808399042","https://openalex.org/W2909206463","https://openalex.org/W3100177202","https://openalex.org/W3089044119","https://openalex.org/W3123146796","https://openalex.org/W2964325095","https://openalex.org/W3210278546","https://openalex.org/W1982795953","https://openalex.org/W2547401708","https://openalex.org/W2952801746","https://openalex.org/W2885641986","https://openalex.org/W2944453634","https://openalex.org/W2613744336","https://openalex.org/W2083958690","https://openalex.org/W2562291440"],"abstract_inverted_index":{"We":[0,39,86,113,131,151],"present":[1],"an":[2,67,79],"audio-visual":[3],"multimodal":[4,69],"approach":[5],"for":[6,13,49,51,97,148],"the":[7,25,58,90,121,133,149,156],"task":[8,59],"of":[9,17,100,109,155],"zero-shot":[10,101],"learning":[11,111],"(ZSL)":[12],"classification":[14,102],"and":[15,36,44,103,139],"retrieval":[16],"videos.":[18,52],"ZSL":[19,50],"has":[20,29],"been":[21,31],"studied":[22],"extensively":[23],"in":[24,135],"recent":[26],"past":[27],"but":[28],"primarily":[30],"limited":[32],"to":[33,37,56,119,164],"visual":[34,45],"modality":[35,96,123,128,157],"images.":[38],"demonstrate":[40],"that":[41,89],"both":[42,98],"audio":[43,83,95],"modalities":[46],"are":[47],"important":[48],"Since":[53],"a":[54,116,125,136],"dataset":[55,70],"study":[57],"is":[60],"currently":[61],"not":[62,142],"available,":[63],"we":[64],"also":[65,114,161],"construct":[66],"appropriate":[68],"with":[71],"33":[72],"classes":[73],"containing":[74],"156,":[75],"416":[76],"videos,":[77],"from":[78],"existing":[80],"large":[81],"scale":[82],"event":[84],"dataset.":[85],"empirically":[87],"show":[88],"performance":[91],"improves":[92],"by":[93],"adding":[94],"tasks":[99],"retrieval,":[104],"when":[105],"using":[106,124],"multi-modal":[107],"extensions":[108],"embedding":[110],"methods.":[112],"propose":[115],"novel":[117],"method":[118],"predict":[120],"`dominant'":[122],"jointly":[126],"learned":[127],"attention":[129,134],"network.":[130],"learn":[132],"semi-supervised":[137],"setting":[138],"thus":[140],"do":[141],"require":[143],"any":[144],"additional":[145],"explicit":[146],"labelling":[147],"modalities.":[150],"provide":[152],"qualitative":[153],"validation":[154],"specific":[158],"attention,":[159],"which":[160],"successfully":[162],"generalizes":[163],"unseen":[165],"test":[166],"classes.":[167]},"counts_by_year":[{"year":2021,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
