{"id":"https://openalex.org/W3011999425","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023109","title":"Using Convolution and Sequence-discriminative Training to Improving Children Speech Recognition","display_name":"Using Convolution and Sequence-discriminative Training to Improving Children Speech Recognition","publication_year":2019,"publication_date":"2019-11-01","ids":{"openalex":"https://openalex.org/W3011999425","doi":"https://doi.org/10.1109/apsipaasc47483.2019.9023109","mag":"3011999425"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc47483.2019.9023109","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023109","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068067286","display_name":"Fanchang Meng","orcid":"https://orcid.org/0000-0002-1684-5003"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Fanchang Meng","raw_affiliation_strings":["Beijing Century TAL Education Technology Co., Ltd, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Century TAL Education Technology Co., Ltd, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016099432","display_name":"Shouye Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shouye Peng","raw_affiliation_strings":["Beijing Century TAL Education Technology Co., Ltd, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Century TAL Education Technology Co., Ltd, Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100629250","display_name":"Guohui Zhang","orcid":"https://orcid.org/0000-0002-3782-6696"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guohui Zhang","raw_affiliation_strings":["Beijing Century TAL Education Technology Co., Ltd, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Century TAL Education Technology Co., Ltd, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5068067286"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20680603,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"97","issue":null,"first_page":"644","last_page":"649"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7764342427253723},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7725099325180054},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7437580227851868},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.7109895944595337},{"id":"https://openalex.org/keywords/vocal-tract","display_name":"Vocal tract","score":0.697717547416687},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6570279598236084},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.5033628344535828},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49342456459999084},{"id":"https://openalex.org/keywords/pronunciation","display_name":"Pronunciation","score":0.47198253870010376},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4456864595413208},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.4437465965747833},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.30422234535217285}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7764342427253723},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7725099325180054},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7437580227851868},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.7109895944595337},{"id":"https://openalex.org/C47401133","wikidata":"https://www.wikidata.org/wiki/Q748953","display_name":"Vocal tract","level":2,"score":0.697717547416687},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6570279598236084},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.5033628344535828},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49342456459999084},{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.47198253870010376},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4456864595413208},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.4437465965747833},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.30422234535217285},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc47483.2019.9023109","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc47483.2019.9023109","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7300000190734863,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W25255646","https://openalex.org/W88545716","https://openalex.org/W144209617","https://openalex.org/W160885690","https://openalex.org/W173010698","https://openalex.org/W434092020","https://openalex.org/W977860972","https://openalex.org/W1510862247","https://openalex.org/W1524333225","https://openalex.org/W1877570817","https://openalex.org/W1994888226","https://openalex.org/W2015535859","https://openalex.org/W2018383683","https://openalex.org/W2033565080","https://openalex.org/W2046335163","https://openalex.org/W2070011039","https://openalex.org/W2088502005","https://openalex.org/W2100118853","https://openalex.org/W2125234026","https://openalex.org/W2131342762","https://openalex.org/W2133872184","https://openalex.org/W2155273149","https://openalex.org/W2291624303","https://openalex.org/W2507923760","https://openalex.org/W2533523411","https://openalex.org/W2781602865","https://openalex.org/W2898181186","https://openalex.org/W2916979304","https://openalex.org/W2962824709","https://openalex.org/W2963308316","https://openalex.org/W2963828919","https://openalex.org/W6605914981","https://openalex.org/W6607040217","https://openalex.org/W6615059388","https://openalex.org/W6631362777","https://openalex.org/W6678457041","https://openalex.org/W6728841359","https://openalex.org/W6749605826","https://openalex.org/W6755594784"],"related_works":["https://openalex.org/W2100012411","https://openalex.org/W1482212662","https://openalex.org/W3162157266","https://openalex.org/W2162084437","https://openalex.org/W1997579527","https://openalex.org/W3044927199","https://openalex.org/W2102353451","https://openalex.org/W2018086531","https://openalex.org/W1980297060","https://openalex.org/W2387604097"],"abstract_inverted_index":{"The":[0,136],"conclusion":[1],"that":[2],"ASR":[3],"for":[4],"children's":[5,148],"speech":[6,25],"is":[7],"especially":[8],"difficult":[9],"compared":[10],"to":[11,29,58,165],"adult":[12],"was":[13],"given":[14],"by":[15,53],"the":[16,30,47,141,145,153,156,166,171],"robotics":[17],"community":[18],"from":[19],"recent":[20],"works.":[21],"Challenges":[22],"on":[23,39,46,100,155],"Children's":[24],"recognition":[26,48],"mainly":[27],"due":[28],"increased":[31],"variability":[32,146,167],"in":[33,133],"acoustic":[34,105],"and":[35,71,75,94,121,129],"linguistic":[36],"correlates":[37],"depending":[38],"a":[40,160],"young":[41],"age.":[42],"This":[43],"work":[44],"focused":[45],"of":[49,143,147,168],"oral":[50],"English":[51],"spoken":[52],"Chinese":[54],"children":[55,169],"aging":[56],"six":[57],"twelve.":[59],"Experiments":[60],"were":[61,131],"conducted":[62,132],"on:":[63],"(1)":[64,139],"Speaker":[65,95],"Normalization":[66,73,79],"algorithms,":[67],"including":[68],"Cepstral":[69],"Mean":[70],"Variance":[72],"(CMVN)":[74],"Vocal":[76],"Tract":[77],"Length":[78],"(VTLN)":[80],"techniques;":[81],"(2)":[82,152],"Acoustic":[83],"models":[84],"adapting":[85],"techniques,":[86],"such":[87,116,125],"as":[88,117,126],"Maximum":[89],"Likelihood":[90],"Linear":[91],"Transform":[92],"(MLLT)":[93],"Adaptive":[96],"Training":[97,111],"(SAT)":[98],"based":[99],"Constrained":[101],"MLLR;":[102],"(3)":[103],"Different":[104],"models,":[106],"GMM-HMM,":[107],"DNN-HMM,":[108],"CNN-DNN;":[109],"(4)":[110],"criterion,":[112],"with":[113,140],"frame-level":[114],"training":[115,123],"Cross":[118],"entropy":[119],"(CE),":[120],"sequence-discriminative":[122],"(SDT)":[124],"MMI,":[127],"MPE":[128],"sMBR":[130],"this":[134],"paper.":[135],"results":[137],"included:":[138],"increase":[142],"age,":[144],"pronunciation":[149],"decreased":[150],"significantly;":[151],"convolution":[154],"frequency":[157],"axis":[158],"has":[159],"great":[161],"performance":[162],"contribution":[163],"(34.72%)":[164],"over":[170],"baseline":[172],"system.":[173]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
