{"id":"https://openalex.org/W4312097391","doi":"https://doi.org/10.1109/cisp-bmei56279.2022.9980329","title":"Zero-Shot Audio Classification Using Synthesised Classifiers and Pre-Trained Models","display_name":"Zero-Shot Audio Classification Using Synthesised Classifiers and Pre-Trained Models","publication_year":2022,"publication_date":"2022-11-05","ids":{"openalex":"https://openalex.org/W4312097391","doi":"https://doi.org/10.1109/cisp-bmei56279.2022.9980329"},"language":"en","primary_location":{"id":"doi:10.1109/cisp-bmei56279.2022.9980329","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cisp-bmei56279.2022.9980329","pdf_url":null,"source":{"id":"https://openalex.org/S4363605502","display_name":"2022 15th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 15th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101871022","display_name":"Zheng Gu","orcid":"https://orcid.org/0000-0001-9914-3922"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zheng Gu","raw_affiliation_strings":["School of Internet of Things, Nanjing University of Posts and Telecommunications,P. R. China","School of Internet of Things, Nanjing University of Posts and Telecommunications, P. R. China"],"affiliations":[{"raw_affiliation_string":"School of Internet of Things, Nanjing University of Posts and Telecommunications,P. R. China","institution_ids":["https://openalex.org/I41198531"]},{"raw_affiliation_string":"School of Internet of Things, Nanjing University of Posts and Telecommunications, P. R. China","institution_ids":["https://openalex.org/I41198531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049684805","display_name":"Xinzhou Xu","orcid":"https://orcid.org/0000-0002-4017-5919"},"institutions":[{"id":"https://openalex.org/I179225836","display_name":"University of Augsburg","ror":"https://ror.org/03p14d497","country_code":"DE","type":"education","lineage":["https://openalex.org/I179225836"]},{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN","DE"],"is_corresponding":false,"raw_author_name":"Xinzhou Xu","raw_affiliation_strings":["School of Internet of Things, Nanjing University of Posts and Telecommunications,P. R. China","Chair of Embedded Intelligence for Health Care and Wellbeing, University of Augsburg, Germany","School of Internet of Things, Nanjing University of Posts and Telecommunications, P. R. China"],"affiliations":[{"raw_affiliation_string":"School of Internet of Things, Nanjing University of Posts and Telecommunications,P. R. China","institution_ids":["https://openalex.org/I41198531"]},{"raw_affiliation_string":"Chair of Embedded Intelligence for Health Care and Wellbeing, University of Augsburg, Germany","institution_ids":["https://openalex.org/I179225836"]},{"raw_affiliation_string":"School of Internet of Things, Nanjing University of Posts and Telecommunications, P. R. China","institution_ids":["https://openalex.org/I41198531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100435115","display_name":"Shuo Liu","orcid":"https://orcid.org/0000-0002-9707-5572"},"institutions":[{"id":"https://openalex.org/I179225836","display_name":"University of Augsburg","ror":"https://ror.org/03p14d497","country_code":"DE","type":"education","lineage":["https://openalex.org/I179225836"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Shuo Liu","raw_affiliation_strings":["University of Augsburg,Chair of Embedded Intelligence for Health Care and Wellbeing,Germany","Chair of Embedded Intelligence for Health Care and Wellbeing, University of Augsburg, Germany"],"affiliations":[{"raw_affiliation_string":"University of Augsburg,Chair of Embedded Intelligence for Health Care and Wellbeing,Germany","institution_ids":["https://openalex.org/I179225836"]},{"raw_affiliation_string":"Chair of Embedded Intelligence for Health Care and Wellbeing, University of Augsburg, Germany","institution_ids":["https://openalex.org/I179225836"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043060302","display_name":"Bj\u00f6rn W. Schuller","orcid":"https://orcid.org/0000-0002-6478-8699"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]},{"id":"https://openalex.org/I179225836","display_name":"University of Augsburg","ror":"https://ror.org/03p14d497","country_code":"DE","type":"education","lineage":["https://openalex.org/I179225836"]}],"countries":["DE","GB"],"is_corresponding":false,"raw_author_name":"Bjorn Schuller","raw_affiliation_strings":["University of Augsburg,Chair of Embedded Intelligence for Health Care and Wellbeing,Germany","GLAM - Group on Language, Audio, & Music, Imperial College London, UK","Chair of Embedded Intelligence for Health Care and Wellbeing, University of Augsburg, Germany"],"affiliations":[{"raw_affiliation_string":"University of Augsburg,Chair of Embedded Intelligence for Health Care and Wellbeing,Germany","institution_ids":["https://openalex.org/I179225836"]},{"raw_affiliation_string":"GLAM - Group on Language, Audio, & Music, Imperial College London, UK","institution_ids":["https://openalex.org/I47508984"]},{"raw_affiliation_string":"Chair of Embedded Intelligence for Health Care and Wellbeing, University of Augsburg, Germany","institution_ids":["https://openalex.org/I179225836"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101871022"],"corresponding_institution_ids":["https://openalex.org/I41198531"],"apc_list":null,"apc_paid":null,"fwci":0.3682,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.52606635,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13996","display_name":"Diverse Musicological Studies","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1210","display_name":"Music"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8870488405227661},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7997830510139465},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.625112771987915},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6127578020095825},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5730632543563843},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4718102216720581},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.4598686397075653},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39390721917152405},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.19099441170692444}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8870488405227661},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7997830510139465},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.625112771987915},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6127578020095825},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5730632543563843},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4718102216720581},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.4598686397075653},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39390721917152405},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.19099441170692444}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cisp-bmei56279.2022.9980329","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cisp-bmei56279.2022.9980329","pdf_url":null,"source":{"id":"https://openalex.org/S4363605502","display_name":"2022 15th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 15th International Congress on Image and Signal Processing, BioMedical Engineering and Informatics (CISP-BMEI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.75,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G3189354827","display_name":null,"funder_award_id":"61801241,62071242","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4023624630","display_name":null,"funder_award_id":"BK20180746","funder_id":"https://openalex.org/F4320322769","funder_display_name":"Natural Science Foundation of Jiangsu Province"},{"id":"https://openalex.org/G4416298472","display_name":null,"funder_award_id":"2022M711693","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G7189395530","display_name":null,"funder_award_id":"442218748","funder_id":"https://openalex.org/F4320320879","funder_display_name":"Deutsche Forschungsgemeinschaft"}],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320322769","display_name":"Natural Science Foundation of Jiangsu Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W1972567154","https://openalex.org/W1978274651","https://openalex.org/W1986315484","https://openalex.org/W2052666245","https://openalex.org/W2119662931","https://openalex.org/W2124033848","https://openalex.org/W2289084343","https://openalex.org/W2302255633","https://openalex.org/W2334493732","https://openalex.org/W2526050071","https://openalex.org/W2592944988","https://openalex.org/W2767754137","https://openalex.org/W2910453440","https://openalex.org/W2921950349","https://openalex.org/W2935162632","https://openalex.org/W2939855790","https://openalex.org/W2945478979","https://openalex.org/W2954224973","https://openalex.org/W2962903908","https://openalex.org/W2963194800","https://openalex.org/W2963499153","https://openalex.org/W2963679447","https://openalex.org/W2963723765","https://openalex.org/W2972244714","https://openalex.org/W2972883999","https://openalex.org/W2998351764","https://openalex.org/W2998508940","https://openalex.org/W3015591594","https://openalex.org/W3033182893","https://openalex.org/W3042874137","https://openalex.org/W3045062880","https://openalex.org/W3094197178","https://openalex.org/W3119136678","https://openalex.org/W3138521398","https://openalex.org/W3161217709","https://openalex.org/W3169611941","https://openalex.org/W3196831814","https://openalex.org/W3197301256","https://openalex.org/W3198715247","https://openalex.org/W3205743929","https://openalex.org/W3212436340","https://openalex.org/W4205734456","https://openalex.org/W4206047378","https://openalex.org/W4233923317","https://openalex.org/W4287666817","https://openalex.org/W4294170691","https://openalex.org/W6678360021","https://openalex.org/W6682691769","https://openalex.org/W6746941363","https://openalex.org/W6762533536","https://openalex.org/W6765174816","https://openalex.org/W6779136962","https://openalex.org/W6781150751","https://openalex.org/W6787084273"],"related_works":["https://openalex.org/W2965546495","https://openalex.org/W4389116644","https://openalex.org/W2153315159","https://openalex.org/W3103844505","https://openalex.org/W259157601","https://openalex.org/W4205463238","https://openalex.org/W2761785940","https://openalex.org/W2110523656","https://openalex.org/W1482209366","https://openalex.org/W1487808658"],"abstract_inverted_index":{"Audio":[0],"classification":[1,44],"equips":[2],"a":[3],"machine":[4],"with":[5,121],"the":[6,10,18,52,97,110,114,117,122],"feature":[7],"of":[8,12,116],"recognising":[9],"source":[11],"an":[13,25,78],"audio":[14,26,32,43,57,71,98],"sample.":[15],"Different":[16],"from":[17,55],"conventional":[19],"setting,":[20],"by":[21],"using":[22,80],"zero-shot":[23,42],"learning,":[24],"classifier":[27],"can":[28],"work":[29],"for":[30,66],"new":[31],"sources":[33],"that":[34],"are":[35],"not":[36],"appearing":[37],"during":[38],"training.":[39],"However,":[40],"current":[41],"methods":[45],"have":[46],"no":[47],"sufficient":[48],"capability":[49],"in":[50,69,87],"retrieving":[51],"discriminative":[53,94],"information":[54],"seen-class":[56],"samples,":[58],"and":[59,84],"hence,":[60],"lead":[61],"to":[62,89],"very":[63],"limited":[64],"performance":[65],"transferring":[67],"knowledge":[68],"representing":[70],"features.":[72],"To":[73],"this":[74],"end,":[75],"we":[76],"propose":[77],"approach":[79],"multiple":[81],"synthesised":[82],"classifiers":[83,95],"pre-trained":[85,102],"models":[86],"order":[88],"jointly":[90],"optimise":[91],"several":[92],"phantom":[93],"on":[96,109],"features":[99],"generated":[100],"through":[101],"ResNet":[103],"models.":[104],"Our":[105],"experimental":[106],"results,":[107],"based":[108],"ESC-50":[111],"dataset,":[112],"validate":[113],"effectiveness":[115],"proposed":[118],"approach,":[119],"compared":[120],"state-of-the-art":[123],"approaches.":[124]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-25T21:42:39.735039","created_date":"2025-10-10T00:00:00"}
