{"id":"https://openalex.org/W4378194825","doi":"https://doi.org/10.1109/tmm.2023.3280011","title":"Few-Shot Class-Incremental Audio Classification Using Dynamically Expanded Classifier With Self-Attention Modified Prototypes","display_name":"Few-Shot Class-Incremental Audio Classification Using Dynamically Expanded Classifier With Self-Attention Modified Prototypes","publication_year":2023,"publication_date":"2023-05-25","ids":{"openalex":"https://openalex.org/W4378194825","doi":"https://doi.org/10.1109/tmm.2023.3280011"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2023.3280011","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2023.3280011","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2305.19539","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070863631","display_name":"Yanxiong Li","orcid":"https://orcid.org/0000-0003-4362-1125"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yanxiong Li","raw_affiliation_strings":["School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055239917","display_name":"Wenchang Cao","orcid":"https://orcid.org/0009-0008-2790-2983"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenchang Cao","raw_affiliation_strings":["School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100678496","display_name":"Wei Xie","orcid":"https://orcid.org/0000-0002-7840-5652"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Xie","raw_affiliation_strings":["School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100708120","display_name":"Jialong Li","orcid":"https://orcid.org/0009-0002-9742-3044"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jialong Li","raw_affiliation_strings":["School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Electronic and Information Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084672392","display_name":"Emmanouil Benetos","orcid":null},"institutions":[{"id":"https://openalex.org/I166337079","display_name":"Queen Mary University of London","ror":"https://ror.org/026zzn846","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I166337079"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Emmanouil Benetos","raw_affiliation_strings":["School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K","School of Electronic Engineering and Computer Science, Queen Mary University of London, E1 4NS London, UK"],"affiliations":[{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London, London, U.K","institution_ids":["https://openalex.org/I166337079"]},{"raw_affiliation_string":"School of Electronic Engineering and Computer Science, Queen Mary University of London, E1 4NS London, UK","institution_ids":["https://openalex.org/I166337079"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5070863631"],"corresponding_institution_ids":["https://openalex.org/I90610280"],"apc_list":null,"apc_paid":null,"fwci":3.5156,"has_fulltext":true,"cited_by_count":18,"citation_normalized_percentile":{"value":0.93773958,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"26","issue":null,"first_page":"1346","last_page":"1360"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13996","display_name":"Diverse Musicological Studies","score":0.9763000011444092,"subfield":{"id":"https://openalex.org/subfields/1210","display_name":"Music"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8718821406364441},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6787874698638916},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5077637434005737},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4721403419971466},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4703654944896698},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.4442744255065918},{"id":"https://openalex.org/keywords/extractor","display_name":"Extractor","score":0.440433531999588},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3637009859085083},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.3597930669784546},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3232153058052063}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8718821406364441},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6787874698638916},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5077637434005737},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4721403419971466},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4703654944896698},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.4442744255065918},{"id":"https://openalex.org/C117978034","wikidata":"https://www.wikidata.org/wiki/Q5422192","display_name":"Extractor","level":2,"score":0.440433531999588},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3637009859085083},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.3597930669784546},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3232153058052063},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C21880701","wikidata":"https://www.wikidata.org/wiki/Q2144042","display_name":"Process engineering","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tmm.2023.3280011","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2023.3280011","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2305.19539","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.19539","pdf_url":"https://arxiv.org/pdf/2305.19539","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:qmro.qmul.ac.uk:123456789/88344","is_oa":true,"landing_page_url":"https://qmro.qmul.ac.uk/xmlui/handle/123456789/88344","pdf_url":"https://qmro.qmul.ac.uk/xmlui/bitstream/123456789/88344/2/Benetos%20Few-shot%20Class-incremental%20Audio%20Classification%20Using%20Dynamically%20Expanded%20Classifier%20with%20Self-attention%20Modified%20Prototypes%202023%20Accepted.pdf","source":{"id":"https://openalex.org/S4306400530","display_name":"Queen Mary Research Online (Queen Mary University of London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I166337079","host_organization_name":"Queen Mary University of London","host_organization_lineage":["https://openalex.org/I166337079"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2305.19539","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.19539","pdf_url":"https://arxiv.org/pdf/2305.19539","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.6800000071525574,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1477544716","display_name":null,"funder_award_id":"Guangdong","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3994375257","display_name":null,"funder_award_id":"61771200","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4441624775","display_name":null,"funder_award_id":"51501145","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6173053914","display_name":null,"funder_award_id":"62111530145","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7284265120","display_name":null,"funder_award_id":"2021A1515011454","funder_id":"https://openalex.org/F4320337111","funder_display_name":"Basic and Applied Basic Research Foundation of Guangdong Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320337111","display_name":"Basic and Applied Basic Research Foundation of Guangdong Province","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4378194825.pdf","grobid_xml":"https://content.openalex.org/works/W4378194825.grobid-xml"},"referenced_works_count":61,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2034996255","https://openalex.org/W2103235956","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2593116425","https://openalex.org/W2798836702","https://openalex.org/W2895776009","https://openalex.org/W2962845248","https://openalex.org/W2962999716","https://openalex.org/W2964189064","https://openalex.org/W2968134922","https://openalex.org/W2972313371","https://openalex.org/W2973006577","https://openalex.org/W2980689481","https://openalex.org/W2997208223","https://openalex.org/W3010563117","https://openalex.org/W3015205011","https://openalex.org/W3015591594","https://openalex.org/W3015594652","https://openalex.org/W3016228121","https://openalex.org/W3034309632","https://openalex.org/W3035342403","https://openalex.org/W3035501943","https://openalex.org/W3043615800","https://openalex.org/W3083274258","https://openalex.org/W3117314925","https://openalex.org/W3138521398","https://openalex.org/W3144281354","https://openalex.org/W3145385912","https://openalex.org/W3160262112","https://openalex.org/W3174868646","https://openalex.org/W3175270254","https://openalex.org/W3177494822","https://openalex.org/W3198591403","https://openalex.org/W3207699776","https://openalex.org/W3210574093","https://openalex.org/W4205137627","https://openalex.org/W4205236539","https://openalex.org/W4205571618","https://openalex.org/W4210311961","https://openalex.org/W4211058963","https://openalex.org/W4224927669","https://openalex.org/W4225083923","https://openalex.org/W4238186852","https://openalex.org/W4281261996","https://openalex.org/W4285005022","https://openalex.org/W4297034365","https://openalex.org/W4313187207","https://openalex.org/W4385822464","https://openalex.org/W6631190155","https://openalex.org/W6717697761","https://openalex.org/W6735236233","https://openalex.org/W6736057607","https://openalex.org/W6736723571","https://openalex.org/W6738602802","https://openalex.org/W6739901393","https://openalex.org/W6755950020","https://openalex.org/W6769178842","https://openalex.org/W6780226713","https://openalex.org/W6785405215"],"related_works":["https://openalex.org/W4224878630","https://openalex.org/W2793122029","https://openalex.org/W2082850652","https://openalex.org/W3111953316","https://openalex.org/W2377622881","https://openalex.org/W4378194825","https://openalex.org/W4297903683","https://openalex.org/W2949662989","https://openalex.org/W2160110705","https://openalex.org/W2357678230"],"abstract_inverted_index":{"Most":[0],"existing":[1,55],"methods":[2,56,215,231],"for":[3,39,57,73,183],"audio":[4,11,21,24,37,45,49,58,76,83,135,155,184,187,200],"classification":[5,25,59],"assume":[6],"that":[7,210],"the":[8,54,114,138,171,177],"vocabulary":[9],"of":[10,35,91,96,116,133,141,153,202],"classes":[12,22,38,50,84,156],"to":[13,28,52,169,229],"be":[14,29,61],"classified":[15],"is":[16,113,122,143,226],"fixed.":[17],"When":[18],"novel":[19,44,48,82,154],"(unseen)":[20],"appear,":[23,53],"systems":[26],"need":[27],"retrained":[30],"with":[31,150],"abundant":[32],"labeled":[33],"samples":[34,132,152,162,166,198],"all":[36],"recognizing":[40],"base":[41,134],"(initial)":[42],"and":[43,63,102,105,163,175,192,205,219,235],"classes.":[46,136],"If":[47],"continue":[51],"will":[60],"inefficient":[62],"even":[64],"infeasible.":[65],"In":[66,223],"this":[67],"work,":[68],"we":[69],"propose":[70],"a":[71,103,117,127,146],"method":[72,93,212],"few-shot":[74],"class-incremental":[75],"classification,":[77],"which":[78,121],"can":[79],"continually":[80],"recognize":[81],"without":[85],"forgetting":[86],"old":[87],"ones.":[88],"The":[89,110],"framework":[90],"our":[92,211],"mainly":[94],"consists":[95],"two":[97],"parts:":[98],"an":[99],"embedding":[100,111],"extractor":[101,112],"classifier,":[104,178],"their":[106],"constructions":[107],"are":[108,167,181,194],"decoupled.":[109],"backbone":[115],"ResNet":[118],"based":[119],"network,":[120],"frozen":[123],"after":[124],"construction":[125],"by":[126,145,196],"training":[128],"strategy":[129],"using":[130],"only":[131],"However,":[137],"classifier":[139],"consisting":[140],"prototypes":[142],"expanded":[144],"prototype":[147,172],"adaptation":[148,173],"network":[149,174],"few":[151],"in":[157,216,232],"incremental":[158],"sessions.":[159],"Labeled":[160],"support":[161],"unlabeled":[164],"query":[165],"used":[168],"train":[170],"update":[176],"since":[179],"they":[180],"informative":[182],"classification.":[185],"Three":[186],"datasets,":[188],"named":[189],"NSynth-100,":[190],"FSC-89":[191],"LS-100":[193],"built":[195],"choosing":[197],"from":[199],"corpora":[201],"NSynth,":[203],"FSD-MIX-CLIP":[204],"LibriSpeech,":[206],"respectively.":[207],"Results":[208],"show":[209],"exceeds":[213],"baseline":[214,230],"average":[217],"accuracy":[218],"performance":[220],"dropping":[221],"rate.":[222],"addition,":[224],"it":[225],"competitive":[227],"compared":[228],"computational":[233],"complexity":[234],"memory":[236],"requirement.":[237]},"counts_by_year":[{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
