{"id":"https://openalex.org/W3090499921","doi":"https://doi.org/10.1142/s271755452050006x","title":"An End-to-End Model Based on Multiple Neural Networks with Data Augmentation for Keyword Spotting","display_name":"An End-to-End Model Based on Multiple Neural Networks with Data Augmentation for Keyword Spotting","publication_year":2020,"publication_date":"2020-06-01","ids":{"openalex":"https://openalex.org/W3090499921","doi":"https://doi.org/10.1142/s271755452050006x","mag":"3090499921"},"language":"en","primary_location":{"id":"doi:10.1142/s271755452050006x","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s271755452050006x","pdf_url":null,"source":{"id":"https://openalex.org/S4210231678","display_name":"International Journal of Asian Language Processing","issn_l":"2424-791X","issn":["2424-791X","2717-5545"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Asian Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036170008","display_name":"Shuzhou Chai","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuzhou Chai","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology, Department of Electronic Engineering, Tsinghua University, Beijing 10084, Haidian District, P. R. China"],"affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology, Department of Electronic Engineering, Tsinghua University, Beijing 10084, Haidian District, P. R. China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100692904","display_name":"Wei-Qiang Zhang","orcid":"https://orcid.org/0000-0003-3841-1959"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wei-Qiang Zhang","raw_affiliation_strings":["Beijing National Research Center for Information Science and Technology, Department of Electronic Engineering, Tsinghua University, Beijing 10084, Haidian District, P. R. China"],"affiliations":[{"raw_affiliation_string":"Beijing National Research Center for Information Science and Technology, Department of Electronic Engineering, Tsinghua University, Beijing 10084, Haidian District, P. R. China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031671759","display_name":"Changsheng Lv","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Changsheng Lv","raw_affiliation_strings":["School of Electronic Engineering, Beijing University of Posts and Telecommunications, Beijing 10084, Haidian District, P. R. China"],"affiliations":[{"raw_affiliation_string":"School of Electronic Engineering, Beijing University of Posts and Telecommunications, Beijing 10084, Haidian District, P. R. China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041672656","display_name":"Zhenye Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4800084","display_name":"Southwest Jiaotong University","ror":"https://ror.org/00hn7w693","country_code":"CN","type":"education","lineage":["https://openalex.org/I4800084"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenye Yang","raw_affiliation_strings":["Department of Electronic Engineering in Leeds Joint School, The Southwest Jiaotong University, Chengdu, Pidu District, P. R. China"],"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering in Leeds Joint School, The Southwest Jiaotong University, Chengdu, Pidu District, P. R. China","institution_ids":["https://openalex.org/I4800084"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100692904"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.3031,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.54529296,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"30","issue":"02","first_page":"2050006","last_page":"2050006"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.898161768913269},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.8666281700134277},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7926953434944153},{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.6687566637992859},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6334640979766846},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5558291673660278},{"id":"https://openalex.org/keywords/activation-function","display_name":"Activation function","score":0.55535489320755},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.516365647315979},{"id":"https://openalex.org/keywords/time-delay-neural-network","display_name":"Time delay neural network","score":0.5024502277374268},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4977147877216339},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4906217157840729},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4535239636898041},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.44756028056144714},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.4236566424369812}],"concepts":[{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.898161768913269},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.8666281700134277},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7926953434944153},{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.6687566637992859},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6334640979766846},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5558291673660278},{"id":"https://openalex.org/C38365724","wikidata":"https://www.wikidata.org/wiki/Q4677469","display_name":"Activation function","level":3,"score":0.55535489320755},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.516365647315979},{"id":"https://openalex.org/C175202392","wikidata":"https://www.wikidata.org/wiki/Q2434543","display_name":"Time delay neural network","level":3,"score":0.5024502277374268},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4977147877216339},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4906217157840729},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4535239636898041},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.44756028056144714},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.4236566424369812}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s271755452050006x","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s271755452050006x","pdf_url":null,"source":{"id":"https://openalex.org/S4210231678","display_name":"International Journal of Asian Language Processing","issn_l":"2424-791X","issn":["2424-791X","2717-5545"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Asian Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.6100000143051147}],"awards":[{"id":"https://openalex.org/G4401023024","display_name":null,"funder_award_id":"BNR2019TD01022","funder_id":"https://openalex.org/F4320329777","funder_display_name":"Beijing National Research Center For Information Science And Technology"},{"id":"https://openalex.org/G6450856609","display_name":null,"funder_award_id":"U1836219","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329777","display_name":"Beijing National Research Center For Information Science And Technology","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1485222997","https://openalex.org/W1734538896","https://openalex.org/W1989674786","https://openalex.org/W1995945045","https://openalex.org/W2034940213","https://openalex.org/W2056986588","https://openalex.org/W2112739286","https://openalex.org/W2122797512","https://openalex.org/W2143612262","https://openalex.org/W2147768505","https://openalex.org/W2587529061","https://openalex.org/W2602634800","https://openalex.org/W2617258110","https://openalex.org/W2919448963","https://openalex.org/W2949736877","https://openalex.org/W2953219395","https://openalex.org/W2962824709","https://openalex.org/W2964187693","https://openalex.org/W3098773154","https://openalex.org/W4253020087"],"related_works":["https://openalex.org/W2758063741","https://openalex.org/W2977314777","https://openalex.org/W2883041339","https://openalex.org/W3000978536","https://openalex.org/W2621864722","https://openalex.org/W2738221750","https://openalex.org/W3156786002","https://openalex.org/W4317242517","https://openalex.org/W3134309788","https://openalex.org/W3090499921"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,134],"propose":[4],"a":[5,130,136,168,175],"network":[6,79,89],"for":[7],"small":[8],"footprint":[9],"keyword":[10],"spotting.":[11],"It":[12],"includes":[13],"four":[14],"parts,":[15],"data":[16,53,57],"augmentation,":[17],"Time-Delay":[18],"Neural":[19,24,28],"Network":[20,25,29],"(TDNN)":[21],"and":[22,31,42,45,55,60,69,91,103,118,139,159],"Convolutional":[23],"(CNN),":[26],"Recurrent":[27,82,94],"(RNN),":[30],"attention":[32,110,151,154],"mechanism.":[33],"Data":[34],"augmentation":[35],"is":[36,112],"Google":[37],"SpecAugment":[38],"with":[39],"time":[40,46,68],"warping":[41],"frequency":[43],"mask":[44,47],"to":[48,114,128,142],"the":[49,63,67,121,126,148],"spectrum":[50],"on":[51],"clean":[52],"set":[54],"noisy":[56],"set.":[58],"TDNN":[59],"CNN":[61],"model":[62,165],"spectrogram":[64],"features":[65,102],"from":[66],"space":[70],"dimensions.":[71],"RNN-type":[72],"networks":[73],"include":[74],"RNN,":[75],"Long":[76,86],"Short-Term":[77,87],"Memory":[78,88],"(LSTM),":[80],"Gated":[81,93],"Unit":[83,95],"(GRU),":[84],"Bidirectional":[85,92],"(BiLSTM),":[90],"(BiGRU).":[96],"The":[97,109],"RNN":[98,127],"extracts":[99],"hidden":[100,160],"layer":[101,161],"transforms":[104],"them":[105],"into":[106],"high-level":[107,122],"representations.":[108],"mechanism":[111],"selected":[113],"generate":[115,143],"different":[116],"weights":[117],"multiplied":[119],"by":[120,125],"representation":[123],"generated":[124],"obtain":[129],"fixed-length":[131],"vector.":[132],"Finally,":[133],"use":[135],"linear":[137,157],"transformation":[138],"softmax":[140],"function":[141],"scores.":[144],"We":[145],"also":[146],"explored":[147],"size":[149],"of":[150,162,172],"mechanism,":[152],"two":[153],"mechanisms,":[155],"rectified":[156],"unit":[158],"RNN.":[163],"Our":[164],"has":[166],"achieved":[167],"true":[169],"positive":[170,178],"rate":[171],"99.81%":[173],"at":[174],"5%":[176],"false":[177],"rate.":[179]},"counts_by_year":[{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
