{"id":"https://openalex.org/W4392909369","doi":"https://doi.org/10.1109/icassp48485.2024.10446472","title":"Audio-Free Prompt Tuning for Language-Audio Models","display_name":"Audio-Free Prompt Tuning for Language-Audio Models","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392909369","doi":"https://doi.org/10.1109/icassp48485.2024.10446472"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446472","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446472","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100346326","display_name":"Yiming Li","orcid":"https://orcid.org/0000-0002-1284-7773"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yiming Li","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences,Beijing Key Laboratory of Mobile Computing and Pervasive Device,Beijing,China","University of Chinese Academy of Sciences, Beijing, China","Beijing Key Laboratory of Mobile Computing and Pervasive Device, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences,Beijing Key Laboratory of Mobile Computing and Pervasive Device,Beijing,China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Beijing Key Laboratory of Mobile Computing and Pervasive Device, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100402345","display_name":"Xiangdong Wang","orcid":"https://orcid.org/0000-0002-4226-3250"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangdong Wang","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences,Beijing Key Laboratory of Mobile Computing and Pervasive Device,Beijing,China","Beijing Key Laboratory of Mobile Computing and Pervasive Device, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences,Beijing Key Laboratory of Mobile Computing and Pervasive Device,Beijing,China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Beijing Key Laboratory of Mobile Computing and Pervasive Device, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100410374","display_name":"Hong Liu","orcid":"https://orcid.org/0000-0003-4524-495X"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Liu","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences,Beijing Key Laboratory of Mobile Computing and Pervasive Device,Beijing,China","Beijing Key Laboratory of Mobile Computing and Pervasive Device, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences,Beijing Key Laboratory of Mobile Computing and Pervasive Device,Beijing,China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Beijing Key Laboratory of Mobile Computing and Pervasive Device, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100346326"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210090176","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":1.1251,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.74724275,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"491","last_page":"495"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9872000217437744,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.9480000138282776,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7750023603439331},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4421144723892212}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7750023603439331},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4421144723892212}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446472","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446472","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W2038484192","https://openalex.org/W2052666245","https://openalex.org/W2593116425","https://openalex.org/W2601450892","https://openalex.org/W2896457183","https://openalex.org/W2959539607","https://openalex.org/W2963346784","https://openalex.org/W2965373594","https://openalex.org/W2982343573","https://openalex.org/W3015591594","https://openalex.org/W3094550259","https://openalex.org/W3166396011","https://openalex.org/W3176445421","https://openalex.org/W3198377975","https://openalex.org/W4226442948","https://openalex.org/W4284898017","https://openalex.org/W4292779060","https://openalex.org/W4293342670","https://openalex.org/W4293575120","https://openalex.org/W4312310776","https://openalex.org/W4313175608","https://openalex.org/W4372260310","https://openalex.org/W4372266156","https://openalex.org/W4372266552","https://openalex.org/W4385823125","https://openalex.org/W4386071547","https://openalex.org/W4400033239","https://openalex.org/W6630792627","https://openalex.org/W6735236233","https://openalex.org/W6755207826","https://openalex.org/W6766673545","https://openalex.org/W6778883912","https://openalex.org/W6791353385","https://openalex.org/W6841982715","https://openalex.org/W6843026064"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Contrastive":[0],"Language-Audio":[1],"Pretraining":[2],"(CLAP)":[3],"is":[4,108,159,172],"pre-trained":[5],"to":[6,19,27,50,93,111],"associate":[7],"audio":[8],"features":[9],"with":[10],"human":[11],"language,":[12],"making":[13],"it":[14,147],"a":[15,79,104],"natural":[16],"zero-shot":[17,142],"classifier":[18],"recognize":[20],"unseen":[21,145],"sound":[22],"categories.":[23],"To":[24],"adapt":[25],"CLAP":[26,128],"downstream":[28,167],"tasks,":[29],"prior":[30],"works":[31],"inevitably":[32],"require":[33],"labeled":[34],"domain":[35],"audios,":[36,87],"which":[37,88],"limits":[38],"their":[39],"scalability":[40],"under":[41],"data":[42],"scarcity":[43],"and":[44,114,129,137],"deprives":[45],"them":[46],"of":[47,86],"the":[48,55,63,90,96,127,153,166],"capability":[49],"detect":[51],"novel":[52],"classes":[53,98],"as":[54,99],"original":[56],"CLAP.":[57,155],"In":[58],"this":[59],"work,":[60],"by":[61],"leveraging":[62],"modality":[64],"alignment":[65],"in":[66],"CLAP,":[67],"we":[68],"propose":[69],"an":[70],"efficient":[71],"audio-free":[72],"prompt":[73,81,106],"tuning":[74],"scheme":[75],"aimed":[76],"at":[77,174],"optimizing":[78],"few":[80],"tokens":[82],"from":[83],"texts":[84],"instead":[85],"regularizes":[89],"model":[91,135],"space":[92],"avoid":[94],"overfitting":[95],"seen":[97],"well.":[100],"Based":[101],"on":[102,118,134,144],"this,":[103],"multi-grained":[105],"design":[107],"further":[109],"explored":[110],"fuse":[112],"global":[113],"local":[115],"information.":[116],"Experiments":[117],"several":[119],"tasks":[120],"demonstrate":[121],"that":[122],"our":[123,157],"approach":[124],"can":[125],"boost":[126],"outperform":[130],"other":[131],"training":[132,138],"methods":[133],"performance":[136],"efficiency.":[139],"While":[140],"conducting":[141],"inference":[143],"categories,":[146],"still":[148],"shows":[149],"better":[150],"transferability":[151],"than":[152],"vanilla":[154],"Moreover,":[156],"method":[158],"flexible":[160],"enough":[161],"even":[162],"if":[163],"only":[164],"knowing":[165],"class":[168],"names.":[169],"The":[170],"code":[171],"available":[173],"https://github.com/Ming-er/Audio-Free-P-Tuning.":[175]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
