{"id":"https://openalex.org/W4372340915","doi":"https://doi.org/10.1109/icassp49357.2023.10095663","title":"Halluaudio: Hallucinate Frequency as Concepts For Few-Shot Audio Classification","display_name":"Halluaudio: Hallucinate Frequency as Concepts For Few-Shot Audio Classification","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372340915","doi":"https://doi.org/10.1109/icassp49357.2023.10095663"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095663","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095663","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109589429","display_name":"Zhongjie Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhongjie Yu","raw_affiliation_strings":["Wyze Labs, Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wyze Labs, Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100763639","display_name":"Shuyang Wang","orcid":"https://orcid.org/0000-0001-9471-0801"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shuyang Wang","raw_affiliation_strings":["Wyze Labs, Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wyze Labs, Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100443824","display_name":"Lin Chen","orcid":"https://orcid.org/0009-0006-1832-5550"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin Chen","raw_affiliation_strings":["Wyze Labs, Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wyze Labs, Inc","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087258613","display_name":"Zhongwei Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhongwei Cheng","raw_affiliation_strings":["Wyze Labs, Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wyze Labs, Inc","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3695,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.52723784,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hallucinating","display_name":"Hallucinating","score":0.9611765146255493},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8558404445648193},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.8420378565788269},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7392253875732422},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.6294347643852234},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5178456902503967},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4909851849079132},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.4718884229660034},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46083906292915344},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35135912895202637}],"concepts":[{"id":"https://openalex.org/C2911011789","wikidata":"https://www.wikidata.org/wiki/Q130741","display_name":"Hallucinating","level":2,"score":0.9611765146255493},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8558404445648193},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.8420378565788269},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7392253875732422},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.6294347643852234},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5178456902503967},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4909851849079132},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.4718884229660034},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46083906292915344},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35135912895202637},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095663","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095663","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2052666245","https://openalex.org/W2101168487","https://openalex.org/W2191779130","https://openalex.org/W2593116425","https://openalex.org/W2625674597","https://openalex.org/W2796346823","https://openalex.org/W2895671740","https://openalex.org/W2936774411","https://openalex.org/W2949879676","https://openalex.org/W2962845248","https://openalex.org/W2962910554","https://openalex.org/W2964105864","https://openalex.org/W2973006577","https://openalex.org/W3015474227","https://openalex.org/W3109858448","https://openalex.org/W4294831657","https://openalex.org/W4307823382","https://openalex.org/W4312813914","https://openalex.org/W6638319203","https://openalex.org/W6717697761","https://openalex.org/W6735236233","https://openalex.org/W6736057607","https://openalex.org/W6755766585","https://openalex.org/W6756669467","https://openalex.org/W6758126075","https://openalex.org/W6787324024","https://openalex.org/W6788709931"],"related_works":["https://openalex.org/W3204676188","https://openalex.org/W2905433371","https://openalex.org/W4378771713","https://openalex.org/W2992803471","https://openalex.org/W4361193272","https://openalex.org/W4310278675","https://openalex.org/W2312876277","https://openalex.org/W2930227984","https://openalex.org/W2066036438","https://openalex.org/W2078454190"],"abstract_inverted_index":{"Few-shot":[0],"audio":[1,28,53,112],"classification":[2],"is":[3],"an":[4],"emerging":[5],"topic":[6],"that":[7,92],"attracts":[8],"more":[9,11],"and":[10,30,55,63,73,97,104],"attention":[12],"from":[13,38],"the":[14,21,24,27,34,80,84,110],"research":[15],"community.":[16],"Most":[17],"existing":[18],"work":[19],"ignores":[20],"specificity":[22],"of":[23,26,50],"form":[25],"spectrogram":[29],"focuses":[31],"largely":[32],"on":[33,71],"embedding":[35],"space":[36],"borrowed":[37],"image":[39],"tasks,":[40],"while":[41],"in":[42],"this":[43,51],"work,":[44],"we":[45],"aim":[46],"to":[47],"take":[48],"advantage":[49],"special":[52],"format":[54],"propose":[56],"a":[57,87],"new":[58,107],"method":[59,82,94],"by":[60,86],"hallucinating":[61],"high-frequency":[62,96],"low-frequency":[64,98],"parts":[65,99],"as":[66],"structured":[67],"concepts.":[68],"Extensive":[69],"experiments":[70],"ESC50":[72],"our":[74,93],"curated":[75],"balanced":[76],"Kaggle18":[77],"dataset":[78],"show":[79],"proposed":[81],"outperforms":[83],"baseline":[85],"notable":[88],"margin.":[89],"The":[90],"way":[91],"hallucinates":[95],"also":[100],"enables":[101],"its":[102],"interpretability":[103],"opens":[105],"up":[106],"potentials":[108],"for":[109],"few-shot":[111],"classification.":[113]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
