{"id":"https://openalex.org/W2074339272","doi":"https://doi.org/10.1109/iscslp.2014.6936674","title":"Mandarin speech recognition using convolution neural network with augmented tone features","display_name":"Mandarin speech recognition using convolution neural network with augmented tone features","publication_year":2014,"publication_date":"2014-09-01","ids":{"openalex":"https://openalex.org/W2074339272","doi":"https://doi.org/10.1109/iscslp.2014.6936674","mag":"2074339272"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp.2014.6936674","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2014.6936674","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 9th International Symposium on Chinese Spoken Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041446566","display_name":"Xinhui Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Xinhui Hu","raw_affiliation_strings":["National Institute of Information and Communications Technology, Kyoto, Japan","National Institute of Information and Communications Technology, Hikaridai 3-5, Seikacho, Sourakugun, Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Kyoto, Japan","institution_ids":["https://openalex.org/I90023481"]},{"raw_affiliation_string":"National Institute of Information and Communications Technology, Hikaridai 3-5, Seikacho, Sourakugun, Kyoto, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034792613","display_name":"Xugang Lu","orcid":"https://orcid.org/0000-0001-7075-448X"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Xugang Lu","raw_affiliation_strings":["National Institute of Information and Communications Technology, Kyoto, Japan","National Institute of Information and Communications Technology, Hikaridai 3-5, Seikacho, Sourakugun, Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Kyoto, Japan","institution_ids":["https://openalex.org/I90023481"]},{"raw_affiliation_string":"National Institute of Information and Communications Technology, Hikaridai 3-5, Seikacho, Sourakugun, Kyoto, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001601327","display_name":"Chiori Hori","orcid":"https://orcid.org/0000-0002-4201-7578"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Chiori Hori","raw_affiliation_strings":["National Institute of Information and Communications Technology, Kyoto, Japan","National Institute of Information and Communications Technology, Hikaridai 3-5, Seikacho, Sourakugun, Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Kyoto, Japan","institution_ids":["https://openalex.org/I90023481"]},{"raw_affiliation_string":"National Institute of Information and Communications Technology, Hikaridai 3-5, Seikacho, Sourakugun, Kyoto, Japan","institution_ids":["https://openalex.org/I90023481"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5041446566"],"corresponding_institution_ids":["https://openalex.org/I90023481"],"apc_list":null,"apc_paid":null,"fwci":2.4541,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.90877488,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"2008","issue":null,"first_page":"15","last_page":"18"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7961902022361755},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.7877196073532104},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7288457155227661},{"id":"https://openalex.org/keywords/tone","display_name":"Tone (literature)","score":0.7126508951187134},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.696830153465271},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.536644458770752},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4291188418865204},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4062083065509796},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.05436721444129944}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7961902022361755},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.7877196073532104},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7288457155227661},{"id":"https://openalex.org/C2780583480","wikidata":"https://www.wikidata.org/wiki/Q1366327","display_name":"Tone (literature)","level":2,"score":0.7126508951187134},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.696830153465271},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.536644458770752},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4291188418865204},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4062083065509796},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.05436721444129944},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp.2014.6936674","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2014.6936674","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 9th International Symposium on Chinese Spoken Language Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.6800000071525574,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W198520083","https://openalex.org/W1524333225","https://openalex.org/W1631260214","https://openalex.org/W1872489089","https://openalex.org/W1993882792","https://openalex.org/W2012897754","https://openalex.org/W2076794394","https://openalex.org/W2087395764","https://openalex.org/W2112739286","https://openalex.org/W2115585811","https://openalex.org/W2129070077","https://openalex.org/W2134557905","https://openalex.org/W2135431242","https://openalex.org/W2155273149","https://openalex.org/W2188183693","https://openalex.org/W2394932179","https://openalex.org/W4293460823","https://openalex.org/W6631362777","https://openalex.org/W6636811518","https://openalex.org/W6679097437"],"related_works":["https://openalex.org/W2035430659","https://openalex.org/W2154409506","https://openalex.org/W2032265864","https://openalex.org/W1970614244","https://openalex.org/W102188125","https://openalex.org/W2042455139","https://openalex.org/W3134996568","https://openalex.org/W2351111486","https://openalex.org/W239487380","https://openalex.org/W2330223232"],"abstract_inverted_index":{"Due":[0],"to":[1,29,41,78,95,109],"its":[2,106],"ability":[3],"of":[4,63,75,99,115],"reducing":[5],"spectral":[6,10],"variations":[7,160],"and":[8,61,127,141,157,178],"modeling":[9,26],"correlations":[11],"existed":[12],"in":[13,25,72,82,135,166],"speech":[14,27,43,87,93,125,132],"signals,":[15],"the":[16,57,79,83,97,100,110,136,144,149,186],"convolutional":[17],"neural":[18,31],"network":[19,32],"(CNN)":[20],"has":[21],"been":[22,69],"shown":[23],"effective":[24,58],"compared":[28,184],"deep":[30],"(DNN).":[33],"In":[34],"this":[35],"study,":[36],"we":[37],"explore":[38],"applying":[39,64],"CNN":[40,48,104,167],"Mandarin":[42,91],"recognitions.":[44],"Besides":[45],"exploring":[46],"appropriate":[47],"architecture":[49],"for":[50,122,129,176,180],"recognition":[51,88,94],"performance,":[52],"focuses":[53],"are":[54,162],"on":[55,90],"investigating":[56],"acoustic":[59,76,80],"features,":[60],"effectivenesses":[62,98],"tonal":[65,145],"information":[66,146],"which":[67],"have":[68],"verified":[70],"helpful":[71,165],"other":[73],"types":[74],"models":[77],"features":[81],"CNN.":[84],"We":[85],"conduct":[86],"experiments":[89],"broadcast":[92,123,130],"test":[96],"proposed":[101],"approaches.":[102],"The":[103],"shows":[105],"clear":[107],"superiority":[108],"DNN,":[111],"with":[112,185],"relative":[113,171],"reductions":[114,173],"character":[116],"error":[117],"rate":[118],"(CER)":[119],"among":[120],"7.7-13.1%":[121],"news":[124],"(BN),":[126],"5.4-9.9%":[128],"conversation":[131],"(BC).":[133],"Like":[134],"Gaussian":[137],"Mixture":[138],"Model":[139],"(GMM)":[140],"DNN":[142],"systems,":[143],"characterized":[147],"by":[148],"fundamental":[150,158],"frequency":[151,159],"(F":[152],"<sub":[153],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[154],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">0</sub>":[155],")":[156],"(FFV)":[161],"found":[163],"still":[164],"models,":[168],"they":[169],"achieve":[170],"CER":[172],"over":[174],"6.7%":[175],"BN":[177],"4.3%":[179],"BC":[181],"respectively":[182],"when":[183],"baseline":[187],"Mel-filter":[188],"bank":[189],"feature.":[190]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
