{"id":"https://openalex.org/W4392902913","doi":"https://doi.org/10.1109/icassp48485.2024.10446557","title":"Maximum-Entropy Adversarial Audio Augmentation for Keyword Spotting","display_name":"Maximum-Entropy Adversarial Audio Augmentation for Keyword Spotting","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392902913","doi":"https://doi.org/10.1109/icassp48485.2024.10446557"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446557","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446557","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059973627","display_name":"Zuzhao Ye","orcid":"https://orcid.org/0000-0002-0428-662X"},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zuzhao Ye","raw_affiliation_strings":["Amazon Inc","University of California, Riverside"],"affiliations":[{"raw_affiliation_string":"Amazon Inc","institution_ids":[]},{"raw_affiliation_string":"University of California, Riverside","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044014409","display_name":"Gregory Ciccarelli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gregory Ciccarelli","raw_affiliation_strings":["Amazon Inc"],"affiliations":[{"raw_affiliation_string":"Amazon Inc","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049134079","display_name":"Brian Kulis","orcid":"https://orcid.org/0000-0002-1704-3838"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brian Kulis","raw_affiliation_strings":["Amazon Inc","Boston University"],"affiliations":[{"raw_affiliation_string":"Amazon Inc","institution_ids":[]},{"raw_affiliation_string":"Boston University","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5059973627"],"corresponding_institution_ids":["https://openalex.org/I103635307"],"apc_list":null,"apc_paid":null,"fwci":1.8752,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.8496627,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"10826","last_page":"10830"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.9156060218811035},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8485777974128723},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.8137965202331543},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.6817739605903625},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5757690072059631},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5413357019424438},{"id":"https://openalex.org/keywords/simplicity","display_name":"Simplicity","score":0.4960866868495941},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.481344074010849},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.4593859016895294},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.43946707248687744},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4242810606956482},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3942645192146301},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08288329839706421}],"concepts":[{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.9156060218811035},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8485777974128723},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.8137965202331543},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.6817739605903625},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5757690072059631},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5413357019424438},{"id":"https://openalex.org/C2776372474","wikidata":"https://www.wikidata.org/wiki/Q508291","display_name":"Simplicity","level":2,"score":0.4960866868495941},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.481344074010849},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.4593859016895294},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43946707248687744},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4242810606956482},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3942645192146301},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08288329839706421},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446557","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446557","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2038484192","https://openalex.org/W2052666245","https://openalex.org/W2407023693","https://openalex.org/W2797583228","https://openalex.org/W2888797456","https://openalex.org/W2936774411","https://openalex.org/W2951852399","https://openalex.org/W2954996726","https://openalex.org/W2986381065","https://openalex.org/W3001197829","https://openalex.org/W3011702533","https://openalex.org/W3092753908","https://openalex.org/W3096662840","https://openalex.org/W3196782138","https://openalex.org/W3205219536","https://openalex.org/W4206030489","https://openalex.org/W4206857759","https://openalex.org/W4288363925","https://openalex.org/W6631190155","https://openalex.org/W6640425456","https://openalex.org/W6750665317","https://openalex.org/W6755069125","https://openalex.org/W6761100157","https://openalex.org/W6773005947","https://openalex.org/W6784146242","https://openalex.org/W6911193807"],"related_works":["https://openalex.org/W2918559346","https://openalex.org/W2114097550","https://openalex.org/W4286904253","https://openalex.org/W3119978414","https://openalex.org/W2516975559","https://openalex.org/W3206647229","https://openalex.org/W1969408022","https://openalex.org/W2000885660","https://openalex.org/W2545741539","https://openalex.org/W1989658893"],"abstract_inverted_index":{"Data":[0],"augmentation":[1,27,58],"is":[2,16,111],"a":[3,49,54],"key":[4],"tool":[5],"for":[6,34],"improving":[7],"the":[8,63,66,69,74,85,88,92,138],"performance":[9],"of":[10,65,68,87],"deep":[11],"networks,":[12],"particularly":[13],"when":[14,135],"there":[15,39],"limited":[17],"labeled":[18],"data.":[19],"In":[20],"some":[21],"fields,":[22],"such":[23],"as":[24,48,103,105],"computer":[25],"vision,":[26],"methods":[28,43],"have":[29],"been":[30],"extensively":[31],"studied;":[32],"however,":[33],"speech":[35],"and":[36,56,76,132],"audio":[37,107],"data,":[38],"are":[40],"relatively":[41],"fewer":[42],"developed.":[44],"Using":[45],"adversarial":[46,122],"learning":[47],"starting":[50],"point,":[51],"we":[52],"develop":[53],"simple":[55],"effective":[57],"strategy":[59],"based":[60],"on":[61,98],"taking":[62],"gradient":[64,89],"entropy":[67],"outputs":[70],"with":[71,137],"respect":[72],"to":[73,90,113,143],"inputs":[75],"then":[77],"creating":[78],"new":[79],"data":[80],"points":[81],"by":[82],"moving":[83],"in":[84],"direction":[86],"maximize":[91],"entropy.":[93],"We":[94],"validate":[95],"its":[96,127],"efficacy":[97],"several":[99],"keyword":[100],"spotting":[101],"tasks":[102],"well":[104],"standard":[106],"benchmarks.":[108],"Our":[109],"method":[110],"straightforward":[112],"implement,":[114],"offering":[115],"greater":[116],"computational":[117],"efficiency":[118],"than":[119],"more":[120],"complex":[121],"schemes":[123],"like":[124],"GANs.":[125],"Despite":[126],"simplicity,":[128],"it":[129],"proves":[130],"robust":[131],"effective,":[133],"especially":[134],"combined":[136],"established":[139],"SpecAugment":[140],"technique,":[141],"leading":[142],"enhanced":[144],"performance.":[145]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
