{"id":"https://openalex.org/W2944180060","doi":"https://doi.org/10.1109/iscslp.2018.8706631","title":"Keyword Spotting Based On CTC and RNN For Mandarin Chinese Speech","display_name":"Keyword Spotting Based On CTC and RNN For Mandarin Chinese Speech","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2944180060","doi":"https://doi.org/10.1109/iscslp.2018.8706631","mag":"2944180060"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp.2018.8706631","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2018.8706631","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 11th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101823853","display_name":"Yiyan Wang","orcid":"https://orcid.org/0000-0002-0481-412X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yiyan Wang","raw_affiliation_strings":["Beijing Unisound Information Technology Co., Ltd., Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Unisound Information Technology Co., Ltd., Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056415893","display_name":"Yanhua Long","orcid":"https://orcid.org/0000-0003-0924-408X"},"institutions":[{"id":"https://openalex.org/I21945476","display_name":"Shanghai Normal University","ror":"https://ror.org/01cxqmw89","country_code":"CN","type":"education","lineage":["https://openalex.org/I21945476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanhua Long","raw_affiliation_strings":["SHNU-Unisound Joint Laboratory of Natural Human-Computer Interaction, Shanghai Normal University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"SHNU-Unisound Joint Laboratory of Natural Human-Computer Interaction, Shanghai Normal University, Shanghai, China","institution_ids":["https://openalex.org/I21945476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101823853"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6515,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.78142343,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"1","issue":null,"first_page":"374","last_page":"378"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.9330623149871826},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.8985587358474731},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8312587738037109},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.78380286693573},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7666101455688477},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.7032386660575867},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.6976426839828491},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5316266417503357},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4762711226940155},{"id":"https://openalex.org/keywords/spotting","display_name":"Spotting","score":0.4759601950645447},{"id":"https://openalex.org/keywords/syllable","display_name":"Syllable","score":0.4666324555873871},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.39530766010284424}],"concepts":[{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.9330623149871826},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.8985587358474731},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8312587738037109},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.78380286693573},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7666101455688477},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.7032386660575867},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.6976426839828491},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5316266417503357},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4762711226940155},{"id":"https://openalex.org/C2779506182","wikidata":"https://www.wikidata.org/wiki/Q7580141","display_name":"Spotting","level":2,"score":0.4759601950645447},{"id":"https://openalex.org/C109089402","wikidata":"https://www.wikidata.org/wiki/Q8188","display_name":"Syllable","level":2,"score":0.4666324555873871},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.39530766010284424},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp.2018.8706631","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2018.8706631","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 11th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.550000011920929,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1815792","https://openalex.org/W1544266773","https://openalex.org/W1993882792","https://openalex.org/W2056986588","https://openalex.org/W2098044214","https://openalex.org/W2102113734","https://openalex.org/W2126203737","https://openalex.org/W2144499799","https://openalex.org/W2160815625","https://openalex.org/W2193413348","https://openalex.org/W2212465773","https://openalex.org/W2407648438","https://openalex.org/W2593910181","https://openalex.org/W2612800309","https://openalex.org/W2754134153","https://openalex.org/W6687566353","https://openalex.org/W6737759470"],"related_works":["https://openalex.org/W2918559346","https://openalex.org/W2114097550","https://openalex.org/W2170821097","https://openalex.org/W4286904253","https://openalex.org/W3119978414","https://openalex.org/W2516975559","https://openalex.org/W2545741539","https://openalex.org/W3206647229","https://openalex.org/W1969408022","https://openalex.org/W2000885660"],"abstract_inverted_index":{"The":[0,69,83],"state-of-the-art":[1,163],"acoustic":[2],"modeling":[3],"for":[4],"Keyword":[5],"Spotting":[6],"(KWS)":[7],"systems":[8],"is":[9,27,71,94],"mainly":[10],"based":[11,72,165],"on":[12,73,122,166],"the":[13,37,40,46,58,64,95,109,114,123,138,162],"hybrid":[14,34],"model":[15],"of":[16,39,66,149],"Hidden":[17],"Markov":[18],"Model":[19],"(HMM)":[20],"and":[21,78,89,98],"Neural":[22,80],"Network":[23,81],"(NN).":[24],"However,":[25],"it":[26],"challenging":[28],"to":[29],"efficiently":[30],"train":[31],"such":[32],"a":[33,53,143],"system,":[35,137],"since":[36],"dependence":[38],"intermediate":[41],"phonetic":[42,67],"representation.":[43],"Motivated":[44],"by":[45],"end-to-end":[47,59,139,155],"speech":[48,126],"recognition":[49],"systems,":[50],"we":[51],"propose":[52],"Mandarin":[54,106,124],"KWS":[55,92,136,140,156],"system":[56,70,88,93,141,157],"using":[57],"method,":[60],"which":[61],"directly":[62],"predict":[63],"posterior":[65],"units.":[68],"Connectionist":[74],"Temporal":[75],"Classifier":[76],"(CTC)":[77],"Recurrent":[79],"(RNN).":[82],"main":[84],"difference":[85],"between":[86],"our":[87],"other":[90],"CTC-based":[91],"output":[96,110],"alphabet":[97],"its":[99],"corresponded":[100],"keyword":[101],"searching":[102],"mechanism.":[103],"We":[104],"adopt":[105],"syllables":[107],"as":[108],"labels,":[111],"rather":[112],"than":[113,161],"phonemes":[115,171],"or":[116,172],"characters.":[117,174],"Extensive":[118],"experiments":[119],"are":[120],"conducted":[121],"Chinese":[125,167,173],"dataset.":[127],"Experimental":[128],"results":[129],"indicate":[130],"that:":[131],"(1)":[132],"Compared":[133],"with":[134],"HMM-based":[135],"achieves":[142],"significant":[144],"improvement,":[145],"without":[146],"any":[147],"increase":[148],"computational":[150],"cost.":[151],"(2)":[152],"Our":[153],"syllable-based":[154],"obtains":[158],"better":[159],"performance":[160],"ones":[164],"context":[168],"independent":[169],"(CI)":[170]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
