{"id":"https://openalex.org/W3135828102","doi":"https://doi.org/10.1109/ijcnn52387.2021.9534474","title":"BYOL for Audio: Self-Supervised Learning for General-Purpose Audio Representation","display_name":"BYOL for Audio: Self-Supervised Learning for General-Purpose Audio Representation","publication_year":2021,"publication_date":"2021-07-18","ids":{"openalex":"https://openalex.org/W3135828102","doi":"https://doi.org/10.1109/ijcnn52387.2021.9534474","mag":"3135828102"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn52387.2021.9534474","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9534474","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2103.06695","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091219538","display_name":"Daisuke Niizumi","orcid":"https://orcid.org/0000-0002-5063-0508"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Daisuke Niizumi","raw_affiliation_strings":["NTT Corporation,Japan"],"affiliations":[{"raw_affiliation_string":"NTT Corporation,Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103978078","display_name":"Daiki Takeuchi","orcid":null},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Daiki Takeuchi","raw_affiliation_strings":["NTT Corporation,Japan"],"affiliations":[{"raw_affiliation_string":"NTT Corporation,Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062509967","display_name":"Yasunori Ohishi","orcid":"https://orcid.org/0000-0002-7856-248X"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yasunori Ohishi","raw_affiliation_strings":["NTT Corporation,Japan"],"affiliations":[{"raw_affiliation_string":"NTT Corporation,Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054467679","display_name":"Noboru Harada","orcid":"https://orcid.org/0000-0002-1759-4533"},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Noboru Harada","raw_affiliation_strings":["NTT Corporation,Japan"],"affiliations":[{"raw_affiliation_string":"NTT Corporation,Japan","institution_ids":["https://openalex.org/I2251713219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061465935","display_name":"Kunio Kashino","orcid":null},"institutions":[{"id":"https://openalex.org/I2251713219","display_name":"NTT (Japan)","ror":"https://ror.org/00berct97","country_code":"JP","type":"company","lineage":["https://openalex.org/I2251713219"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kunio Kashino","raw_affiliation_strings":["NTT Corporation,Japan"],"affiliations":[{"raw_affiliation_string":"NTT Corporation,Japan","institution_ids":["https://openalex.org/I2251713219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5091219538"],"corresponding_institution_ids":["https://openalex.org/I2251713219"],"apc_list":null,"apc_paid":null,"fwci":2.0007,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.86806255,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7381593585014343},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.6763166189193726},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5625046491622925},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5603245496749878},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47656917572021484},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.44185999035835266},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.4404314160346985},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3575381636619568},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.22429513931274414}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7381593585014343},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.6763166189193726},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5625046491622925},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5603245496749878},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47656917572021484},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.44185999035835266},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.4404314160346985},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3575381636619568},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.22429513931274414},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/ijcnn52387.2021.9534474","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9534474","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2103.06695","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2103.06695","pdf_url":"https://arxiv.org/pdf/2103.06695","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2103.06695","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2103.06695","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:3135828102","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2103.06695","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2103.06695","pdf_url":"https://arxiv.org/pdf/2103.06695","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3135828102.pdf","grobid_xml":"https://content.openalex.org/works/W3135828102.grobid-xml"},"referenced_works_count":60,"referenced_works":["https://openalex.org/W1836465849","https://openalex.org/W2038484192","https://openalex.org/W2165130450","https://openalex.org/W2194775991","https://openalex.org/W2507109542","https://openalex.org/W2593116425","https://openalex.org/W2606176153","https://openalex.org/W2726515241","https://openalex.org/W2767754137","https://openalex.org/W2797583228","https://openalex.org/W2842511635","https://openalex.org/W2888469011","https://openalex.org/W2939574508","https://openalex.org/W2949676527","https://openalex.org/W2955425717","https://openalex.org/W2963399829","https://openalex.org/W3006926732","https://openalex.org/W3009561768","https://openalex.org/W3015213852","https://openalex.org/W3034978746","https://openalex.org/W3035312337","https://openalex.org/W3035524453","https://openalex.org/W3038899388","https://openalex.org/W3088092535","https://openalex.org/W3090388844","https://openalex.org/W3092359270","https://openalex.org/W3093563057","https://openalex.org/W3094502228","https://openalex.org/W3096655658","https://openalex.org/W3098903812","https://openalex.org/W3099306795","https://openalex.org/W3101821705","https://openalex.org/W3104805957","https://openalex.org/W3130223764","https://openalex.org/W3136810184","https://openalex.org/W3141023492","https://openalex.org/W3173151551","https://openalex.org/W4252814261","https://openalex.org/W6638667902","https://openalex.org/W6734260513","https://openalex.org/W6736723571","https://openalex.org/W6740167877","https://openalex.org/W6745136726","https://openalex.org/W6750665317","https://openalex.org/W6754446152","https://openalex.org/W6762718338","https://openalex.org/W6774314701","https://openalex.org/W6774670964","https://openalex.org/W6778883912","https://openalex.org/W6779326418","https://openalex.org/W6780379688","https://openalex.org/W6783462664","https://openalex.org/W6783591283","https://openalex.org/W6783739589","https://openalex.org/W6784077883","https://openalex.org/W6784095619","https://openalex.org/W6784333009","https://openalex.org/W6785324187","https://openalex.org/W6786091203","https://openalex.org/W6797132756"],"related_works":["https://openalex.org/W1579239598","https://openalex.org/W13191208","https://openalex.org/W2081353817","https://openalex.org/W2940092410","https://openalex.org/W3162284793","https://openalex.org/W2054530658","https://openalex.org/W2593116425","https://openalex.org/W2169680737","https://openalex.org/W3155930628","https://openalex.org/W1033606635","https://openalex.org/W1970724543","https://openalex.org/W1981861329","https://openalex.org/W2803417255","https://openalex.org/W2911113553","https://openalex.org/W3186278103","https://openalex.org/W3197988356","https://openalex.org/W3094496301","https://openalex.org/W2094129054","https://openalex.org/W2947809990","https://openalex.org/W2146632282"],"abstract_inverted_index":{"Inspired":[0],"by":[1],"the":[2,132],"recent":[3],"progress":[4],"in":[5,99,123],"self-supervised":[6,65,80],"learning":[7,24,28,66,72,81],"for":[8,58,71],"computer":[9],"vision":[10],"that":[11,83],"generates":[12],"supervision":[13],"using":[14],"data":[15],"augmentations,":[16],"we":[17,51],"explore":[18],"a":[19,33,107,112],"new":[20],"general-purpose":[21,29,73],"audio":[22,30,35,45,64,74,79,89,102,109],"representation":[23,31],"approach.":[25],"We":[26],"propose":[27],"from":[32,106],"single":[34,108],"segment":[36,103],"without":[37],"expecting":[38],"relationships":[39],"between":[40],"different":[41],"time":[42],"segments":[43,90],"of":[44,87,93,114,134],"samples.":[46],"To":[47],"implement":[48],"this":[49],"principle,":[50],"introduce":[52],"Bootstrap":[53],"Your":[54],"Own":[55],"Latent":[56],"(BYOL)":[57],"Audio":[59],"(BYOL-A,":[60],"pronounced":[61],"\u201cviola\u201d),":[62],"an":[63,100],"method":[67],"based":[68],"on":[69,85],"BYOL":[70],"representation.":[75],"Unlike":[76],"most":[77],"previous":[78],"methods":[82],"rely":[84],"agreement":[86],"vicinity":[88],"or":[91],"disagreement":[92],"remote":[94],"ones,":[95],"BYOL-A":[96,119],"creates":[97],"contrasts":[98],"augmented":[101],"pair":[104],"derived":[105],"segment.":[110],"With":[111],"combination":[113],"normalization":[115],"and":[116,137],"augmentation":[117],"techniques,":[118],"achieves":[120],"state-of-the-art":[121],"results":[122],"various":[124],"downstream":[125],"tasks.":[126],"Extensive":[127],"ablation":[128],"studies":[129],"also":[130],"clarified":[131],"contribution":[133],"each":[135],"component":[136],"their":[138],"combinations.":[139]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":5}],"updated_date":"2026-03-21T08:13:44.787528","created_date":"2022-07-25T00:00:00"}
