{"id":"https://openalex.org/W7148465776","doi":"https://doi.org/10.1109/asru65441.2025.11434742","title":"ProtoCLAP \u2013 Prototypical Contrastive Language-Audio Pretraining","display_name":"ProtoCLAP \u2013 Prototypical Contrastive Language-Audio Pretraining","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148465776","doi":"https://doi.org/10.1109/asru65441.2025.11434742"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434742","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434742","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048720202","display_name":"Adria Mallol-Ragolta","orcid":null},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Adria Mallol-Ragolta","raw_affiliation_strings":["Technical University of Munich University Hospital,CHI &#x2013; Chair of Health Informatics,Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich University Hospital,CHI &#x2013; Chair of Health Informatics,Germany","institution_ids":["https://openalex.org/I62916508"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5132809798","display_name":"Bj\u00f6rn Schuller","orcid":null},"institutions":[{"id":"https://openalex.org/I62916508","display_name":"Technical University of Munich","ror":"https://ror.org/02kkvpp62","country_code":"DE","type":"education","lineage":["https://openalex.org/I62916508"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bj\u00f6rn Schuller","raw_affiliation_strings":["Technical University of Munich University Hospital,CHI &#x2013; Chair of Health Informatics,Germany"],"affiliations":[{"raw_affiliation_string":"Technical University of Munich University Hospital,CHI &#x2013; Chair of Health Informatics,Germany","institution_ids":["https://openalex.org/I62916508"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5048720202"],"corresponding_institution_ids":["https://openalex.org/I62916508"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.87569262,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.15399999916553497,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.15399999916553497,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13310","display_name":"Subtitles and Audiovisual Media","score":0.1282999962568283,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.12399999797344208,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.25040000677108765},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.241799995303154},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.23100000619888306},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.2264000028371811}],"concepts":[{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4708000123500824},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3305000066757202},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.32670000195503235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.303600013256073},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.2680000066757202},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.25040000677108765},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.241799995303154},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.23100000619888306},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.2264000028371811},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.22589999437332153}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434742","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434742","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4572109282016754,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2096733369","https://openalex.org/W3028563376","https://openalex.org/W3046375318","https://openalex.org/W3198429080","https://openalex.org/W4225288540","https://openalex.org/W4280504571","https://openalex.org/W4294975577","https://openalex.org/W4312877428","https://openalex.org/W4361994820","https://openalex.org/W4372260310","https://openalex.org/W4372266552","https://openalex.org/W4385822277","https://openalex.org/W4385823158","https://openalex.org/W4385823457","https://openalex.org/W4389317971","https://openalex.org/W4392903033","https://openalex.org/W4395467463","https://openalex.org/W4402111518","https://openalex.org/W4402112114","https://openalex.org/W4402115967","https://openalex.org/W4404066630","https://openalex.org/W4404067337"],"related_works":[],"abstract_inverted_index":{"We":[0,64],"propose":[1],"ProtoCLAP,":[2],"a":[3,27],"framework":[4],"that":[5],"integrates":[6],"prototypical":[7,34,62],"representations":[8,25,35],"of":[9,45,86],"the":[10,14,20,23,33,43,46,58,69,73,80,87,103,109,121,125,128,133],"targeted":[11],"classes":[12],"in":[13,26,83],"languageaudio":[15],"contrastive":[16],"learning":[17],"paradigm.":[18],"Projecting":[19],"audio":[21,47,52],"and":[22,49,53,72,95,113],"language":[24,54],"shared":[28],"embeddings":[29,48],"space":[30],"\u2013":[31],"where":[32],"are":[36],"computed":[37],"\u2013,":[38],"ProtoCLAP":[39,78,101,119],"aims":[40],"to":[41],"maximise":[42],"similarity":[44,59],"their":[50],"corresponding":[51],"prototypes,":[55],"while":[56],"enforcing":[57],"between":[60],"both":[61],"representations.":[63],"conduct":[65],"our":[66],"experiments":[67],"on":[68,108,127],"MASCFLICHT":[70],"Corpus":[71],"Second":[74],"DiCOVA":[75],"Challenge":[76],"Dataset.":[77],"achieves":[79],"best":[81,104],"results":[82],"three":[84],"out":[85],"six":[88],"scenarios":[89],"investigated.":[90],"For":[91,116],"face":[92,96],"mask":[93,97],"type":[94],"coverage":[98],"area":[99],"recognition,":[100],"scores":[102],"Unweighted":[105],"Average":[106],"Recall":[107],"test":[110,129],"set,":[111],"62.8%":[112],"56.7%,":[114],"respectively.":[115],"COVID-19":[117],"detection,":[118],"obtains":[120],"highest":[122],"Area":[123],"Under":[124],"Curve":[126],"set":[130],"when":[131],"exploiting":[132],"breathing":[134],"sounds,":[135],"84.77%.":[136]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
