{"id":"https://openalex.org/W2972613398","doi":"https://doi.org/10.21437/interspeech.2019-1846","title":"Open-Vocabulary Keyword Spotting with Audio and Text Embeddings","display_name":"Open-Vocabulary Keyword Spotting with Audio and Text Embeddings","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2972613398","doi":"https://doi.org/10.21437/interspeech.2019-1846","mag":"2972613398"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2019-1846","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-1846","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://infoscience.epfl.ch/record/268742","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022375000","display_name":"Niccol\u00f2 Sacchi","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Niccol\u00f2 Sacchi","raw_affiliation_strings":["\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030657244","display_name":"Alexandre Nanchen","orcid":"https://orcid.org/0000-0003-4441-5892"},"institutions":[{"id":"https://openalex.org/I7495430","display_name":"Idiap Research Institute","ror":"https://ror.org/05932h694","country_code":"CH","type":"facility","lineage":["https://openalex.org/I7495430"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Alexandre Nanchen","raw_affiliation_strings":["Idiap Research Institute"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Idiap Research Institute","institution_ids":["https://openalex.org/I7495430"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073756389","display_name":"Martin Jaggi","orcid":"https://orcid.org/0000-0003-1579-5558"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Martin Jaggi","raw_affiliation_strings":["\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003558571","display_name":"Milo\u0161 Cer\u0148ak","orcid":"https://orcid.org/0000-0002-5569-9491"},"institutions":[{"id":"https://openalex.org/I4210148785","display_name":"Logitech (Switzerland)","ror":"https://ror.org/05pkpss54","country_code":"CH","type":"company","lineage":["https://openalex.org/I4210148785"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Milos Cernak","raw_affiliation_strings":["Logitech Europe S.A"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Logitech Europe S.A","institution_ids":["https://openalex.org/I4210148785"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.6703,"has_fulltext":false,"cited_by_count":34,"citation_normalized_percentile":{"value":0.910098,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3362","last_page":"3366"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.8649537563323975},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7846149206161499},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.7584125995635986},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6584361791610718},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6061713099479675},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4062612056732178},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40090373158454895},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3270668685436249},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1720445454120636}],"concepts":[{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.8649537563323975},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7846149206161499},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.7584125995635986},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6584361791610718},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6061713099479675},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4062612056732178},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40090373158454895},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3270668685436249},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1720445454120636},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2019-1846","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-1846","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},{"id":"pmh:oai:infoscience.epfl.ch:268742","is_oa":true,"landing_page_url":"http://infoscience.epfl.ch/record/268742","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference proceedings"}],"best_oa_location":{"id":"pmh:oai:infoscience.epfl.ch:268742","is_oa":true,"landing_page_url":"http://infoscience.epfl.ch/record/268742","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference proceedings"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7200000286102295,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1524333225","https://openalex.org/W2024490156","https://openalex.org/W2187089797","https://openalex.org/W2405666970","https://openalex.org/W2407023693","https://openalex.org/W2514741789","https://openalex.org/W2598634450","https://openalex.org/W2746778230","https://openalex.org/W2769912137","https://openalex.org/W2797583228","https://openalex.org/W2891722048","https://openalex.org/W3099206234"],"related_works":["https://openalex.org/W2918559346","https://openalex.org/W3119978414","https://openalex.org/W2114097550","https://openalex.org/W2516975559","https://openalex.org/W2545741539","https://openalex.org/W3206647229","https://openalex.org/W4286904253","https://openalex.org/W2000885660","https://openalex.org/W1969408022","https://openalex.org/W1483316057"],"abstract_inverted_index":{"Keyword":[0],"Spotting":[1],"(KWS)":[2],"systems":[3,14,47,152],"allow":[4,57,101,119],"detecting":[5],"a":[6,42,74,111,125,156],"set":[7,21,61,122],"of":[8,22,62,88,123,148,158],"spoken":[9],"(pre-defined)":[10],"keywords.":[11],"Open-vocabulary":[12],"KWS":[13,46,92,151],"search":[15],"for":[16,155],"the":[17,20,60,68,79,86,103,121,149],"keywords":[18,124],"in":[19,106],"word":[23,51],"hypotheses":[24],"generated":[25],"by":[26],"an":[27,89],"automatic":[28],"speech":[29,96],"recognition":[30],"(ASR)":[31],"system":[32,93],"which":[33],"is":[34,143],"computationally":[35,162],"expensive":[36],"and,":[37],"therefore,":[38],"often":[39],"implemented":[40],"as":[41,67],"cloud-based":[43],"service.":[44],"Besides,":[45],"could":[48],"use":[49],"also":[50],"classification":[52],"algorithms":[53],"that":[54,100,140],"do":[55],"not":[56],"easily":[58],"changing":[59],"words":[63],"to":[64,71,108],"be":[65,72],"recognized,":[66],"classes":[69],"have":[70],"defined":[73],"priori,":[75],"even":[76],"before":[77],"training":[78],"system.":[80],"In":[81],"this":[82],"paper,":[83],"we":[84],"propose":[85],"implementation":[87],"open-vocabulary":[90],"ASR-free":[91],"based":[94],"on":[95,135],"and":[97,160],"text":[98],"encoders":[99],"matching":[102],"computed":[104],"embeddings":[105],"order":[107],"spot":[109],"whether":[110],"keyword":[112],"has":[113],"been":[114],"uttered.":[115],"This":[116],"approach":[117],"would":[118],"choosing":[120],"posteriori":[126],"while":[127,153],"requiring":[128],"low":[129],"computational":[130],"power.":[131],"The":[132],"experiments,":[133],"performed":[134],"two":[136],"different":[137],"datasets,":[138],"show":[139],"our":[141],"method":[142],"competitive":[144],"with":[145],"other":[146],"state":[147],"art":[150],"allowing":[154],"flexibility":[157],"configuration":[159],"being":[161],"efficient.":[163]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
