{"id":"https://openalex.org/W4392903835","doi":"https://doi.org/10.1109/icassp48485.2024.10447040","title":"Audio-Aided Learning Framework for Image Classification with Limited Training Images","display_name":"Audio-Aided Learning Framework for Image Classification with Limited Training Images","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903835","doi":"https://doi.org/10.1109/icassp48485.2024.10447040"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447040","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10447040","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104085728","display_name":"Qi Wu","orcid":"https://orcid.org/0009-0001-7744-7906"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qi Wu","raw_affiliation_strings":["School Of Computer Science Aed Engineering-Sun Yat-sen University,China","School Of Computer Science Aed Engineering-Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School Of Computer Science Aed Engineering-Sun Yat-sen University,China","institution_ids":["https://openalex.org/I157773358"]},{"raw_affiliation_string":"School Of Computer Science Aed Engineering-Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082176355","display_name":"Chengjia Wang","orcid":"https://orcid.org/0000-0003-2345-7364"},"institutions":[{"id":"https://openalex.org/I32062511","display_name":"Heriot-Watt University","ror":"https://ror.org/04mghma93","country_code":"GB","type":"education","lineage":["https://openalex.org/I32062511"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chengjia Wang","raw_affiliation_strings":["Heriot-Watt University,School of Mathematical and Computer Sciences,UK","School of Mathematical and Computer Sciences, Heriot-Watt University, UK"],"affiliations":[{"raw_affiliation_string":"Heriot-Watt University,School of Mathematical and Computer Sciences,UK","institution_ids":["https://openalex.org/I32062511"]},{"raw_affiliation_string":"School of Mathematical and Computer Sciences, Heriot-Watt University, UK","institution_ids":["https://openalex.org/I32062511"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100338562","display_name":"Xiaohui Li","orcid":"https://orcid.org/0000-0003-3110-7890"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohui Li","raw_affiliation_strings":["School Of Computer Science Aed Engineering-Sun Yat-sen University,China","School Of Computer Science Aed Engineering-Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School Of Computer Science Aed Engineering-Sun Yat-sen University,China","institution_ids":["https://openalex.org/I157773358"]},{"raw_affiliation_string":"School Of Computer Science Aed Engineering-Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101827266","display_name":"Guangxing Wu","orcid":"https://orcid.org/0009-0007-8157-1743"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangxing Wu","raw_affiliation_strings":["School Of Computer Science Aed Engineering-Sun Yat-sen University,China","School Of Computer Science Aed Engineering-Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School Of Computer Science Aed Engineering-Sun Yat-sen University,China","institution_ids":["https://openalex.org/I157773358"]},{"raw_affiliation_string":"School Of Computer Science Aed Engineering-Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014979931","display_name":"Marta Vallejo","orcid":"https://orcid.org/0000-0001-9957-954X"},"institutions":[{"id":"https://openalex.org/I32062511","display_name":"Heriot-Watt University","ror":"https://ror.org/04mghma93","country_code":"GB","type":"education","lineage":["https://openalex.org/I32062511"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Marta Vallejo","raw_affiliation_strings":["Heriot-Watt University,School of Mathematical and Computer Sciences,UK","School of Mathematical and Computer Sciences, Heriot-Watt University, UK"],"affiliations":[{"raw_affiliation_string":"Heriot-Watt University,School of Mathematical and Computer Sciences,UK","institution_ids":["https://openalex.org/I32062511"]},{"raw_affiliation_string":"School of Mathematical and Computer Sciences, Heriot-Watt University, UK","institution_ids":["https://openalex.org/I32062511"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100707149","display_name":"Ruixuan Wang","orcid":"https://orcid.org/0000-0002-8714-0369"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruixuan Wang","raw_affiliation_strings":["School Of Computer Science Aed Engineering-Sun Yat-sen University,China","School Of Computer Science Aed Engineering-Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School Of Computer Science Aed Engineering-Sun Yat-sen University,China","institution_ids":["https://openalex.org/I157773358"]},{"raw_affiliation_string":"School Of Computer Science Aed Engineering-Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5104085728"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02765563,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4975","last_page":"4979"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.9842000007629395,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9811000227928162,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8301166892051697},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6666451692581177},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5952929258346558},{"id":"https://openalex.org/keywords/contextual-image-classification","display_name":"Contextual image classification","score":0.5199888348579407},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.498629093170166},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.47666600346565247},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46315282583236694},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.4557577669620514},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3743264675140381},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.2804926037788391},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.14912772178649902}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8301166892051697},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6666451692581177},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5952929258346558},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.5199888348579407},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.498629093170166},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.47666600346565247},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46315282583236694},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.4557577669620514},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3743264675140381},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2804926037788391},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.14912772178649902},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447040","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp48485.2024.10447040","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1730722784","display_name":null,"funder_award_id":"2023B1515040025","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1809728033","display_name":null,"funder_award_id":"2023B1515040025","funder_id":"https://openalex.org/F4320321921","funder_display_name":"Natural Science Foundation of Guangdong Province"},{"id":"https://openalex.org/G4413561973","display_name":null,"funder_award_id":"2023A15150","funder_id":"https://openalex.org/F4320321921","funder_display_name":"Natural Science Foundation of Guangdong Province"},{"id":"https://openalex.org/G4469379859","display_name":null,"funder_award_id":"PCL2023AS7-1","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4548025536","display_name":null,"funder_award_id":"120714","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4871260389","display_name":null,"funder_award_id":"2023A151","funder_id":"https://openalex.org/F4320321921","funder_display_name":"Natural Science Foundation of Guangdong Province"},{"id":"https://openalex.org/G5167091242","display_name":null,"funder_award_id":"No. 1","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G627220917","display_name":null,"funder_award_id":"51504002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6588135298","display_name":null,"funder_award_id":"12071496","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7012638997","display_name":null,"funder_award_id":"2023A","funder_id":"https://openalex.org/F4320321921","funder_display_name":"Natural Science Foundation of Guangdong Province"},{"id":"https://openalex.org/G7530273095","display_name":null,"funder_award_id":"2023A1515012097","funder_id":"https://openalex.org/F4320321921","funder_display_name":"Natural Science Foundation of Guangdong Province"},{"id":"https://openalex.org/G761644807","display_name":null,"funder_award_id":"62071502","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8551942893","display_name":null,"funder_award_id":"51501209","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321921","display_name":"Natural Science Foundation of Guangdong Province","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W2117539524","https://openalex.org/W2194775991","https://openalex.org/W2519210008","https://openalex.org/W2592929672","https://openalex.org/W3035682985","https://openalex.org/W3041133507","https://openalex.org/W3108975329","https://openalex.org/W3162322471","https://openalex.org/W3201915713","https://openalex.org/W4223899585","https://openalex.org/W4312935555","https://openalex.org/W4312956471","https://openalex.org/W4317436377","https://openalex.org/W4390874575","https://openalex.org/W6638319203","https://openalex.org/W6735236233","https://openalex.org/W6736057607","https://openalex.org/W6758126075","https://openalex.org/W6791353385","https://openalex.org/W6810738896"],"related_works":["https://openalex.org/W4390516098","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W2142795561","https://openalex.org/W4205302943","https://openalex.org/W2561132942","https://openalex.org/W3155418658","https://openalex.org/W2354332708","https://openalex.org/W2005234362","https://openalex.org/W1997235926"],"abstract_inverted_index":{"It":[0],"is":[1,138,161],"challenging":[2],"to":[3,19,32,69,102,147,171],"train":[4,71,149],"a":[5,57,78,94,206],"generalizable":[6],"deep":[7],"learning":[8,16,59,159,165,191],"classifier":[9,152],"with":[10,153,163,174,200],"limited":[11,154,201],"training":[12,108,155,202],"images.":[13,156],"Existing":[14],"few-shot":[15],"approaches":[17],"try":[18],"improve":[20],"classification":[21,199],"performance":[22],"largely":[23],"by":[24],"transferring":[25],"prior":[26,41,64,145],"knowledge":[27,42,65,146],"from":[28,47,66,128],"upstream":[29,38],"large-sample":[30],"tasks":[31],"the":[33,76,86,103,110,132,139,189,210],"current":[34],"small-sample":[35],"task.":[36],"Besides":[37],"image":[39,73,100,111,114,120,151,185,198],"datasets,":[40],"may":[43],"also":[44],"be":[45,172,216],"obtained":[46],"signals":[48,68],"of":[49,89,109,134,209],"other":[50],"modalities.":[51],"In":[52,75],"this":[53,137],"study,":[54],"we":[55],"propose":[56],"novel":[58],"framework":[60,160,192],"that":[61,142,188],"can":[62,84],"utilize":[63],"audio":[67,82,87,96,105],"help":[70,148],"an":[72,150],"classifier.":[74],"framework,":[77],"pre-trained":[79],"and":[80,183],"fixed":[81],"encoder":[83],"transform":[85],"signal":[88],"each":[90],"class":[91],"label":[92],"into":[93],"class-specific":[95],"prototype.":[97],"By":[98],"attracting":[99],"representations":[101,115,121],"corresponding":[104],"prototypes":[106],"during":[107],"classifier,":[112],"within-class":[113],"become":[116,122],"more":[117],"clustered,":[118],"while":[119],"further":[123],"apart":[124],"if":[125],"they":[126],"are":[127],"different":[129],"classes.":[130],"To":[131],"best":[133],"our":[135],"knowledge,":[136],"first":[140],"work":[141],"utilizes":[143],"audio-based":[144],"The":[157,212],"proposed":[158,190],"compatible":[162],"existing":[164,175,195],"approaches,":[166],"making":[167],"it":[168],"flexible":[169],"enough":[170],"combined":[173],"approaches.":[176],"Extensive":[177],"empirical":[178],"evaluations":[179],"on":[180],"both":[181],"natural":[182],"medical":[184],"datasets":[186],"demonstrate":[187],"significantly":[193],"outperforms":[194],"methods":[196],"in":[197],"images,":[203],"thus":[204],"establishing":[205],"new":[207],"state":[208],"art.":[211],"source":[213],"code":[214],"will":[215],"released":[217],"publicly.":[218]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
