{"id":"https://openalex.org/W2113890879","doi":"https://doi.org/10.1109/icme.2011.6011941","title":"Reliable accent specific unit generation with dynamic Gaussian mixture selection for multi-accent speech recognition","display_name":"Reliable accent specific unit generation with dynamic Gaussian mixture selection for multi-accent speech recognition","publication_year":2011,"publication_date":"2011-07-01","ids":{"openalex":"https://openalex.org/W2113890879","doi":"https://doi.org/10.1109/icme.2011.6011941","mag":"2113890879"},"language":"en","primary_location":{"id":"doi:10.1109/icme.2011.6011941","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme.2011.6011941","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE International Conference on Multimedia and Expo","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100460131","display_name":"Chao Zhang","orcid":"https://orcid.org/0000-0002-0568-9922"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Zhang","raw_affiliation_strings":["Center of Speech and Language Technologies, Division of Technology Innovation and Development, Tsinghua National Laboratory for Information Science and Technology, Beijing, China","Department of Computer Science and Technology, Tsinghua University, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center of Speech and Language Technologies, Division of Technology Innovation and Development, Tsinghua National Laboratory for Information Science and Technology, Beijing, China","institution_ids":[]},{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100330618","display_name":"Yi Liu","orcid":"https://orcid.org/0000-0003-1399-7420"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yi Liu","raw_affiliation_strings":["Center of Speech and Language Technologies, Division of Technology Innovation and Development, Tsinghua National Laboratory for Information Science and Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center of Speech and Language Technologies, Division of Technology Innovation and Development, Tsinghua National Laboratory for Information Science and Technology, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069555295","display_name":"Yunqing Xia","orcid":"https://orcid.org/0009-0005-8608-574X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yunqing Xia","raw_affiliation_strings":["Center of Speech and Language Technologies, Division of Technology Innovation and Development, Tsinghua National Laboratory for Information Science and Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center of Speech and Language Technologies, Division of Technology Innovation and Development, Tsinghua National Laboratory for Information Science and Technology, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084318285","display_name":"Thomas Fang Zheng","orcid":"https://orcid.org/0000-0002-0249-4767"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thomas Fang Zheng","raw_affiliation_strings":["Center of Speech and Language Technologies, Division of Technology Innovation and Development, Tsinghua National Laboratory for Information Science and Technology, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center of Speech and Language Technologies, Division of Technology Innovation and Development, Tsinghua National Laboratory for Information Science and Technology, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110111869","display_name":"Jesper \u00d8. Olsen","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099903","display_name":"Nokia (China)","ror":"https://ror.org/01607kg94","country_code":"CN","type":"company","lineage":["https://openalex.org/I2738502077","https://openalex.org/I4210099903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jesper Olsen","raw_affiliation_strings":["Nokia Research Center, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nokia Research Center, Beijing, China","institution_ids":["https://openalex.org/I4210099903"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103093321","display_name":"Jilei Tian","orcid":"https://orcid.org/0000-0002-4588-9398"},"institutions":[{"id":"https://openalex.org/I4210099903","display_name":"Nokia (China)","ror":"https://ror.org/01607kg94","country_code":"CN","type":"company","lineage":["https://openalex.org/I2738502077","https://openalex.org/I4210099903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"JiLei Tian","raw_affiliation_strings":["Nokia Research Center, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nokia Research Center, Beijing, China","institution_ids":["https://openalex.org/I4210099903"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3189,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.84697679,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7954339385032654},{"id":"https://openalex.org/keywords/stress","display_name":"Stress (linguistics)","score":0.7612923383712769},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7275003790855408},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.669237494468689},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.6369985938072205},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5391479134559631},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5290077328681946},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5142138004302979},{"id":"https://openalex.org/keywords/syllable","display_name":"Syllable","score":0.48250532150268555},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.4443420171737671},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43063998222351074},{"id":"https://openalex.org/keywords/mixture-model","display_name":"Mixture model","score":0.42515069246292114},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.4176659882068634},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3673565089702606},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.14075899124145508},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.12440860271453857}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7954339385032654},{"id":"https://openalex.org/C2776756274","wikidata":"https://www.wikidata.org/wiki/Q181767","display_name":"Stress (linguistics)","level":2,"score":0.7612923383712769},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7275003790855408},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.669237494468689},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.6369985938072205},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5391479134559631},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5290077328681946},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5142138004302979},{"id":"https://openalex.org/C109089402","wikidata":"https://www.wikidata.org/wiki/Q8188","display_name":"Syllable","level":2,"score":0.48250532150268555},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.4443420171737671},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43063998222351074},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.42515069246292114},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.4176659882068634},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3673565089702606},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14075899124145508},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.12440860271453857},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme.2011.6011941","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme.2011.6011941","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE International Conference on Multimedia and Expo","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6299999952316284}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W86944614","https://openalex.org/W88081813","https://openalex.org/W105045830","https://openalex.org/W117083981","https://openalex.org/W184039015","https://openalex.org/W1996545497","https://openalex.org/W2042281066","https://openalex.org/W2076960297","https://openalex.org/W2113419766","https://openalex.org/W2115900267","https://openalex.org/W6603616073","https://openalex.org/W6607605394"],"related_works":["https://openalex.org/W2031478549","https://openalex.org/W3048205211","https://openalex.org/W2222951281","https://openalex.org/W2272290179","https://openalex.org/W156219719","https://openalex.org/W2068412075","https://openalex.org/W2163874654","https://openalex.org/W2061937230","https://openalex.org/W2132658536","https://openalex.org/W2032826752"],"abstract_inverted_index":{"Multiple":[0],"accents":[1,117],"are":[2],"often":[3],"present":[4],"in":[5,67,89],"Mandarin":[6,13],"speech,":[7],"as":[8,14],"most":[9],"Chinese":[10,116],"have":[11],"learned":[12],"a":[15,59,84],"second":[16],"language.":[17],"We":[18],"propose":[19],"generating":[20],"reliable":[21],"accent":[22,48,87],"specific":[23],"unit":[24,44],"together":[25],"with":[26],"dynamic":[27],"Gaussian":[28,54,73],"mixture":[29,55,74],"selection":[30,56],"for":[31,63,83],"multi-accent":[32],"speech":[33],"recognition.":[34],"Time":[35],"alignment":[36],"phoneme":[37],"recognition":[38],"is":[39,111],"used":[40],"to":[41,46,71],"generate":[42],"such":[43],"and":[45,51,69,91,120,134],"model":[47,104,127],"variations":[49,88],"explicitly":[50],"accurately.":[52],"Dynamic":[53],"scheme":[57],"builds":[58],"dynamical":[60],"observation":[61],"density":[62],"each":[64],"specified":[65],"frame":[66],"decoding,":[68],"leads":[70],"use":[72],"component":[75],"efficiently.":[76],"This":[77],"method":[78],"increases":[79],"the":[80,93,103],"covering":[81],"ability":[82],"diversity":[85],"of":[86,108],"multi-accent,":[90],"alleviates":[92],"performance":[94],"degradation":[95],"caused":[96],"by":[97,131],"pruned":[98],"beam":[99],"search":[100],"without":[101,143],"augmenting":[102],"size.":[105],"The":[106],"effectiveness":[107],"this":[109],"approach":[110,123,129],"evaluated":[112],"on":[113,137,145],"three":[114],"typical":[115],"Chuan,":[118],"Yue":[119],"Wu.":[121],"Our":[122],"outperforms":[124],"traditional":[125],"acoustic":[126],"reconstruction":[128],"significantly":[130],"6.30%,":[132],"4.93%":[133],"5.53%,":[135],"respectively":[136],"Syllable":[138],"Error":[139],"Rate":[140],"(SER)":[141],"reduction,":[142],"degrading":[144],"standard":[146],"speech.":[147]},"counts_by_year":[{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
