{"id":"https://openalex.org/W4225713393","doi":"https://doi.org/10.1109/icassp43922.2022.9746790","title":"Towards Learning Universal Audio Representations","display_name":"Towards Learning Universal Audio Representations","publication_year":2022,"publication_date":"2022-04-27","ids":{"openalex":"https://openalex.org/W4225713393","doi":"https://doi.org/10.1109/icassp43922.2022.9746790"},"language":"en","primary_location":{"id":"doi:10.1109/icassp43922.2022.9746790","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746790","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101524928","display_name":"Luyu Wang","orcid":"https://orcid.org/0000-0003-2882-3503"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Luyu Wang","raw_affiliation_strings":["DeepMind,London,UK","DeepMind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030447052","display_name":"Pauline Luc","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Pauline Luc","raw_affiliation_strings":["DeepMind,London,UK","DeepMind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101882347","display_name":"Yan Wu","orcid":"https://orcid.org/0000-0003-1337-7678"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yan Wu","raw_affiliation_strings":["DeepMind,London,UK","DeepMind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020406406","display_name":"Adri\u00e0 Recasens","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Adri\u00e0 Recasens","raw_affiliation_strings":["DeepMind,London,UK","DeepMind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061684299","display_name":"Lucas Smaira","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Lucas Smaira","raw_affiliation_strings":["DeepMind,London,UK","DeepMind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006805259","display_name":"Andrew Brock","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Andrew Brock","raw_affiliation_strings":["DeepMind,London,UK","DeepMind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051121535","display_name":"Andrew Jaegle","orcid":"https://orcid.org/0000-0003-1698-9901"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Andrew Jaegle","raw_affiliation_strings":["DeepMind,London,UK","DeepMind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012826868","display_name":"Jean-Baptiste Alayrac","orcid":"https://orcid.org/0000-0002-3071-4157"},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jean-Baptiste Alayrac","raw_affiliation_strings":["DeepMind,London,UK","DeepMind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034960578","display_name":"Sander Dieleman","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Sander Dieleman","raw_affiliation_strings":["DeepMind,London,UK","DeepMind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057909195","display_name":"Jo\u00e3o Carreira","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Joao Carreira","raw_affiliation_strings":["DeepMind,London,UK","DeepMind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039776447","display_name":"A\u00e4ron van den Oord","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"Google DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"A\u00e4ron van den Oord","raw_affiliation_strings":["DeepMind,London,UK","DeepMind, London, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DeepMind,London,UK","institution_ids":["https://openalex.org/I4210090411"]},{"raw_affiliation_string":"DeepMind, London, UK","institution_ids":["https://openalex.org/I4210090411"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.0836,"has_fulltext":false,"cited_by_count":43,"citation_normalized_percentile":{"value":0.96883853,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4593","last_page":"4597"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7918661832809448},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.6708307266235352},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5770231485366821},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5461616516113281},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.5041965246200562},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.47465020418167114},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.47455841302871704},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3675021529197693},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3411470055580139}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7918661832809448},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.6708307266235352},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5770231485366821},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5461616516113281},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.5041965246200562},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.47465020418167114},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.47455841302871704},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3675021529197693},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3411470055580139},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp43922.2022.9746790","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp43922.2022.9746790","pdf_url":null,"source":{"id":"https://openalex.org/S4363607702","display_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W1836465849","https://openalex.org/W2052666245","https://openalex.org/W2127870748","https://openalex.org/W2502312327","https://openalex.org/W2593116425","https://openalex.org/W2606176153","https://openalex.org/W2767754137","https://openalex.org/W2842511635","https://openalex.org/W2923014074","https://openalex.org/W2972584841","https://openalex.org/W2973049979","https://openalex.org/W3006926732","https://openalex.org/W3015949486","https://openalex.org/W3036601975","https://openalex.org/W3094550259","https://openalex.org/W3095727342","https://openalex.org/W3128633047","https://openalex.org/W3132672614","https://openalex.org/W3134486096","https://openalex.org/W3139211892","https://openalex.org/W3157916917","https://openalex.org/W3162391496","https://openalex.org/W3163937874","https://openalex.org/W3196974791","https://openalex.org/W3197580070","https://openalex.org/W3201143670","https://openalex.org/W4297808394","https://openalex.org/W6631190155","https://openalex.org/W6638667902","https://openalex.org/W6678969435","https://openalex.org/W6724804524","https://openalex.org/W6736723571","https://openalex.org/W6771438384","https://openalex.org/W6780218876","https://openalex.org/W6780226713","https://openalex.org/W6784077883","https://openalex.org/W6788627230","https://openalex.org/W6790428460","https://openalex.org/W6791429434","https://openalex.org/W6791537541","https://openalex.org/W6793728465"],"related_works":["https://openalex.org/W4379115841","https://openalex.org/W2083794993","https://openalex.org/W1511772879","https://openalex.org/W4394660363","https://openalex.org/W2186315912","https://openalex.org/W2588591308","https://openalex.org/W2248125223","https://openalex.org/W2910542634","https://openalex.org/W2127898439","https://openalex.org/W3082894236"],"abstract_inverted_index":{"The":[0],"ability":[1],"to":[2],"learn":[3],"universal":[4],"audio":[5,33,43,82],"representations":[6,83,111],"that":[7,20,58,62,79],"can":[8,16,84],"solve":[9],"diverse":[10],"speech,":[11],"music,":[12],"and":[13,45,136],"environment":[14],"tasks":[15,41],"spur":[17],"many":[18],"applications":[19],"require":[21],"general":[22],"sound":[23,53,64],"content":[24],"understanding.":[25],"In":[26],"this":[27],"work,":[28],"we":[29,129],"introduce":[30],"a":[31,47,131],"holistic":[32],"representation":[34,54],"evaluation":[35],"suite":[36],"(HARES)":[37],"spanning":[38],"12":[39],"downstream":[40],"across":[42,140],"domains":[44],"provide":[46],"thorough":[48],"empirical":[49],"study":[50],"of":[51,74],"recent":[52],"learning":[55,109],"systems":[56],"on":[57,97,126],"benchmark.":[59],"We":[60,77,101],"discover":[61],"previous":[63],"event":[65],"classification":[66],"or":[67],"speech":[68],"models":[69],"do":[70],"not":[71],"generalize":[72],"outside":[73],"their":[75],"domains.":[76,142],"observe":[78],"more":[80],"robust":[81],"be":[85],"learned":[86],"with":[87],"the":[88,92,98,103,122],"SimCLR":[89],"objective;":[90],"however,":[91],"model\u2019s":[93],"transferability":[94],"depends":[95],"heavily":[96],"model":[99],"architecture.":[100],"find":[102],"Slowfast":[104,134],"architecture":[105],"is":[106,119],"good":[107],"at":[108],"rich":[110],"required":[112],"by":[113,121],"different":[114],"domains,":[115],"but":[116],"its":[117],"performance":[118,139],"affected":[120],"normalization":[123],"scheme.":[124],"Based":[125],"these":[127],"findings,":[128],"propose":[130],"novel":[132],"normalizer-free":[133],"NFNet":[135],"achieve":[137],"state-of-the-art":[138],"all":[141]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":10}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
