{"id":"https://openalex.org/W2886300652","doi":"https://doi.org/10.1145/3240508.3240578","title":"Emotion Recognition in Speech using Cross-Modal Transfer in the Wild","display_name":"Emotion Recognition in Speech using Cross-Modal Transfer in the Wild","publication_year":2018,"publication_date":"2018-10-15","ids":{"openalex":"https://openalex.org/W2886300652","doi":"https://doi.org/10.1145/3240508.3240578","mag":"2886300652"},"language":"en","primary_location":{"id":"doi:10.1145/3240508.3240578","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3240508.3240578","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3240508.3240578","source":null,"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3240508.3240578","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102018867","display_name":"Samuel Albanie","orcid":"https://orcid.org/0000-0003-1732-9198"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Samuel Albanie","raw_affiliation_strings":["University of Oxford, Oxford, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036002448","display_name":"Arsha Nagrani","orcid":"https://orcid.org/0000-0003-2190-9013"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Arsha Nagrani","raw_affiliation_strings":["University of Oxford, Oxford, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060511349","display_name":"Andrea Vedaldi","orcid":"https://orcid.org/0000-0003-1374-2858"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Andrea Vedaldi","raw_affiliation_strings":["University of Oxford, Oxford, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I40120149"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057678172","display_name":"Andrew Zisserman","orcid":"https://orcid.org/0000-0002-8945-8573"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Andrew Zisserman","raw_affiliation_strings":["University of Oxford, Oxford, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Oxford, Oxford, United Kingdom","institution_ids":["https://openalex.org/I40120149"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102018867"],"corresponding_institution_ids":["https://openalex.org/I40120149"],"apc_list":null,"apc_paid":null,"fwci":32.4299,"has_fulltext":true,"cited_by_count":292,"citation_normalized_percentile":{"value":0.99808418,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"292","last_page":"301"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7703728675842285},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6813313961029053},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6424644589424133},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.516016960144043},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.501842737197876},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5002496242523193},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.4931773543357849},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48420074582099915},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.474881112575531},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.45021045207977295},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4281073212623596}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7703728675842285},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6813313961029053},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6424644589424133},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.516016960144043},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.501842737197876},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5002496242523193},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.4931773543357849},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48420074582099915},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.474881112575531},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.45021045207977295},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4281073212623596},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3240508.3240578","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3240508.3240578","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3240508.3240578","source":null,"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3240508.3240578","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3240508.3240578","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3240508.3240578","source":null,"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM international conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7400000095367432,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1277543710","display_name":null,"funder_award_id":"EP/M013774/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G1484394898","display_name":"EPSRC Centre for Doctoral Training in Autonomous Intelligent Machines and Systems (AIMS)","funder_award_id":"EP/L015897/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G2018133609","display_name":"Seebibyte: Visual Search for the Era of Big Data","funder_award_id":"EP/M013774/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8416736120","display_name":null,"funder_award_id":"Seebibyte EP/M013774/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2886300652.pdf","grobid_xml":"https://content.openalex.org/works/W2886300652.grobid-xml"},"referenced_works_count":71,"referenced_works":["https://openalex.org/W100428360","https://openalex.org/W175750906","https://openalex.org/W343636949","https://openalex.org/W753847829","https://openalex.org/W1506765305","https://openalex.org/W1581984155","https://openalex.org/W1825415099","https://openalex.org/W1967021946","https://openalex.org/W1981207483","https://openalex.org/W1981918162","https://openalex.org/W1981950962","https://openalex.org/W2001540534","https://openalex.org/W2014405820","https://openalex.org/W2016839396","https://openalex.org/W2040008697","https://openalex.org/W2041616772","https://openalex.org/W2051282970","https://openalex.org/W2058322811","https://openalex.org/W2099215977","https://openalex.org/W2123260696","https://openalex.org/W2125462608","https://openalex.org/W2132752688","https://openalex.org/W2134797427","https://openalex.org/W2143350951","https://openalex.org/W2145765358","https://openalex.org/W2146334809","https://openalex.org/W2161411939","https://openalex.org/W2168692779","https://openalex.org/W2168860136","https://openalex.org/W2246249023","https://openalex.org/W2294370754","https://openalex.org/W2402040300","https://openalex.org/W2481681431","https://openalex.org/W2511428026","https://openalex.org/W2544224704","https://openalex.org/W2546649374","https://openalex.org/W2546702061","https://openalex.org/W2548529926","https://openalex.org/W2559655401","https://openalex.org/W2560662850","https://openalex.org/W2604379605","https://openalex.org/W2618574054","https://openalex.org/W2619697695","https://openalex.org/W2623327532","https://openalex.org/W2624340939","https://openalex.org/W2648194195","https://openalex.org/W2659927845","https://openalex.org/W2726515241","https://openalex.org/W2752782242","https://openalex.org/W2765354427","https://openalex.org/W2765860599","https://openalex.org/W2765998482","https://openalex.org/W2767087747","https://openalex.org/W2767476316","https://openalex.org/W2767618761","https://openalex.org/W2787524669","https://openalex.org/W2795986449","https://openalex.org/W2887997593","https://openalex.org/W2912889105","https://openalex.org/W2952881492","https://openalex.org/W2962918445","https://openalex.org/W2963173190","https://openalex.org/W2963420686","https://openalex.org/W2963645808","https://openalex.org/W2963703197","https://openalex.org/W2963801643","https://openalex.org/W2963839617","https://openalex.org/W2963887950","https://openalex.org/W4240035482","https://openalex.org/W6607193717","https://openalex.org/W6735927292"],"related_works":["https://openalex.org/W2353179089","https://openalex.org/W2378211422","https://openalex.org/W2923538289","https://openalex.org/W2353125546","https://openalex.org/W2470643824","https://openalex.org/W4400595174","https://openalex.org/W4321353415","https://openalex.org/W2584926856","https://openalex.org/W2075935902","https://openalex.org/W2014713986"],"abstract_inverted_index":{"Obtaining":[0],"large,":[1],"human":[2],"labelled":[3,43,142],"speech":[4,35,58,87,136,151,158],"datasets":[5],"to":[6,39,85,125,131,141],"train":[7,126],"models":[8,166],"for":[9,34,105,135,157],"emotion":[10,107,137,152,159],"recognition":[11,108,138,160],"is":[12],"a":[13,50,101,117,127],"notoriously":[14],"challenging":[15],"task,":[16],"hindered":[17],"by":[18],"annotation":[19],"cost":[20],"and":[21,145,167],"label":[22],"ambiguity.":[23],"In":[24],"this":[25,69],"work,":[26],"we":[27,71,99,121,147],"consider":[28],"the":[29,54,61,65,81,86,95,111,114,123,150],"task":[30],"of":[31,42,57,64,75,113],"learning":[32],"embeddings":[33],"classification":[36],"without":[37,139],"access":[38,140],"any":[40],"form":[41],"audio.":[44],"We":[45,93],"base":[46],"our":[47],"approach":[48],"on":[49,116,161],"simple":[51],"hypothesis:":[52],"that":[53,73,109,149],"emotional":[55],"content":[56],"correlates":[59],"with":[60],"facial":[62,106],"expression":[63,76],"speaker.":[66],"By":[67],"exploiting":[68],"relationship,":[70],"show":[72,148],"annotations":[74],"can":[77,154],"be":[78,155],"transferred":[79],"from":[80],"visual":[82],"domain":[83,88],"(faces)":[84],"(voices)":[89],"through":[90],"cross-modal":[91],"distillation.":[92],"make":[94],"following":[96],"contributions:":[97],"(i)":[98],"develop":[100],"strong":[102],"teacher":[103,124],"network":[104],"achieves":[110],"state":[112],"art":[115],"standard":[118],"benchmark;":[119],"(ii)":[120],"use":[122],"student,":[128],"tabula":[129],"rasa,":[130],"learn":[132],"representations":[133],"(embeddings)":[134],"audio":[143],"data;":[144],"(iii)":[146],"embedding":[153],"used":[156],"external":[162],"benchmark":[163],"datasets.":[164],"Code,":[165],"data":[168],"are":[169],"available.":[170]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":17},{"year":2024,"cited_by_count":30},{"year":2023,"cited_by_count":47},{"year":2022,"cited_by_count":46},{"year":2021,"cited_by_count":61},{"year":2020,"cited_by_count":60},{"year":2019,"cited_by_count":26},{"year":2018,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
