{"id":"https://openalex.org/W4408353155","doi":"https://doi.org/10.1109/icassp49660.2025.10890706","title":"No Class Left Behind: A Closer Look at Class Balancing for Audio Tagging","display_name":"No Class Left Behind: A Closer Look at Class Balancing for Audio Tagging","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408353155","doi":"https://doi.org/10.1109/icassp49660.2025.10890706"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10890706","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890706","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055580486","display_name":"Janek Ebbers","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Janek Ebbers","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102812631","display_name":"Fran\u00e7ois G. Germain","orcid":"https://orcid.org/0000-0002-8973-5315"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fran\u00e7ois G. Germain","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040065298","display_name":"Kevin Wilkinghoff","orcid":"https://orcid.org/0000-0003-4200-9129"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin Wilkinghoff","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086940921","display_name":"Gordon Wichern","orcid":"https://orcid.org/0000-0002-8597-6795"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gordon Wichern","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064097430","display_name":"Jonathan Le Roux","orcid":"https://orcid.org/0000-0002-0158-2837"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jonathan Le Roux","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories (MERL),Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210159266"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03790617,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11587","display_name":"Second Language Acquisition and Learning","score":0.9121999740600586,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11587","display_name":"Second Language Acquisition and Learning","score":0.9121999740600586,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.7364790439605713},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7117583155632019},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.3588562607765198},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2545739412307739}],"concepts":[{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.7364790439605713},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7117583155632019},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.3588562607765198},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2545739412307739}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10890706","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890706","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2118978333","https://openalex.org/W2339172597","https://openalex.org/W2526050071","https://openalex.org/W2593116425","https://openalex.org/W2936774411","https://openalex.org/W2984935418","https://openalex.org/W3094550259","https://openalex.org/W3196974791","https://openalex.org/W3205475937","https://openalex.org/W3205743929","https://openalex.org/W3206996142","https://openalex.org/W4297841853","https://openalex.org/W4312453657","https://openalex.org/W4372260403","https://openalex.org/W4372260579","https://openalex.org/W4375839990","https://openalex.org/W4375869067","https://openalex.org/W4392904001","https://openalex.org/W4392904573","https://openalex.org/W4401023891","https://openalex.org/W6638667902","https://openalex.org/W6726497184","https://openalex.org/W6754940743","https://openalex.org/W6755977528","https://openalex.org/W6757817989","https://openalex.org/W6802678913","https://openalex.org/W6810510949","https://openalex.org/W6840200333","https://openalex.org/W6848208918"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Large-scale":[0],"audio":[1,14,123],"tagging":[2,70,87],"datasets":[3],"like":[4],"AudioSet":[5],"usually":[6],"suffer":[7],"from":[8,49,107,125,142,171],"severe":[9],"class":[10,154],"imbalance":[11],"comprising":[12],"many":[13],"examples":[15,23,48],"for":[16,112,148],"common":[17,44],"sound":[18,26],"classes":[19,51,98,127],"but":[20],"only":[21],"few":[22],"of":[24,62,93,153],"rare":[25,50,126],"classes.":[27],"The":[28],"latter,":[29],"however,":[30],"may":[31],"yet":[32],"be":[33],"equally":[34],"or":[35,101],"even":[36],"more":[37,52],"important":[38],"to":[39,46],"recognize.":[40],"Therefore,":[41],"it":[42,82],"is":[43,104,156],"practice":[45],"sample":[47],"frequently":[53],"during":[54],"training.":[55],"At":[56],"the":[57,60,150],"same":[58],"time,":[59],"effects":[61],"such":[63],"balancing":[64,94,110,155],"on":[65],"a":[66,105,160,168],"model\u2019s":[67],"training":[68,84,167],"and":[69,86,95,130],"performance":[71],"are":[72],"still":[73],"little":[74],"understood.":[75],"In":[76],"this":[77],"work,":[78],"we":[79,116,145],"investigate":[80,96,117],"how":[81],"affects":[83],"convergence":[85],"performance.":[88],"We":[89],"consider":[90],"varying":[91],"degrees":[92],"whether":[97],"converge":[99],"simultaneously":[100],"if":[102],"there":[103],"benefit":[106],"selecting":[108],"different":[109,157],"rates":[111],"each":[113],"class.":[114],"Furthermore,":[115],"data":[118,140],"efficient":[119],"oversampling,":[120],"which":[121],"keeps":[122],"files":[124],"in":[128,133],"memory,":[129],"repeats":[131],"them":[132],"close":[134],"succession":[135],"over":[136],"multiple":[137],"batches,":[138],"minimizing":[139],"loading":[141],"disk.":[143],"Finally,":[144],"show":[146],"that":[147],"AudioSet,":[149],"optimal":[151],"amount":[152],"when":[158],"fine-tuning":[159],"model":[161,170],"pre-trained":[162],"via":[163],"self-supervised":[164],"learning,":[165],"versus":[166],"supervised":[169],"scratch.":[172]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
