{"id":"https://openalex.org/W3027336928","doi":"https://doi.org/10.1145/3388818.3389159","title":"Improved BLSTM RNN Based Accent Speech Recognition Using Multi-task Learning and Accent Embeddings","display_name":"Improved BLSTM RNN Based Accent Speech Recognition Using Multi-task Learning and Accent Embeddings","publication_year":2020,"publication_date":"2020-03-20","ids":{"openalex":"https://openalex.org/W3027336928","doi":"https://doi.org/10.1145/3388818.3389159","mag":"3027336928"},"language":"en","primary_location":{"id":"doi:10.1145/3388818.3389159","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3388818.3389159","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 2nd International Conference on Image, Video and Signal Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076793177","display_name":"Wenbi Rao","orcid":"https://orcid.org/0000-0003-1465-1544"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbi Rao","raw_affiliation_strings":["School of Computer and Technology, Wuhan University of Technology, Hubei Key Laboratory of Transportation Internet of Things, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer and Technology, Wuhan University of Technology, Hubei Key Laboratory of Transportation Internet of Things, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100705326","display_name":"Ji Zhang","orcid":"https://orcid.org/0000-0001-7167-6970"},"institutions":[{"id":"https://openalex.org/I196699116","display_name":"Wuhan University of Technology","ror":"https://ror.org/03fe7t173","country_code":"CN","type":"education","lineage":["https://openalex.org/I196699116"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ji Zhang","raw_affiliation_strings":["School of Computer and Technology, Wuhan University of Technology, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer and Technology, Wuhan University of Technology, China","institution_ids":["https://openalex.org/I196699116"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101867377","display_name":"Jianwei Wu","orcid":"https://orcid.org/0000-0001-5049-8327"},"institutions":[{"id":"https://openalex.org/I4210155232","display_name":"Fiberhome Technology Group (China)","ror":"https://ror.org/04yv20134","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155232"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianwei Wu","raw_affiliation_strings":["Wuhan FiberHome Digital Technology Co., Ltd, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Wuhan FiberHome Digital Technology Co., Ltd, China","institution_ids":["https://openalex.org/I4210155232"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5416,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.73028929,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stress","display_name":"Stress (linguistics)","score":0.8344943523406982},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8026981949806213},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7598842978477478},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.5893930196762085},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5808011293411255},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5732036828994751},{"id":"https://openalex.org/keywords/pitch-accent","display_name":"Pitch accent","score":0.5442276000976562},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5419538617134094},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5252489447593689},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4466167986392975},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.13380268216133118},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.09111997485160828}],"concepts":[{"id":"https://openalex.org/C2776756274","wikidata":"https://www.wikidata.org/wiki/Q181767","display_name":"Stress (linguistics)","level":2,"score":0.8344943523406982},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8026981949806213},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7598842978477478},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.5893930196762085},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5808011293411255},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5732036828994751},{"id":"https://openalex.org/C2777672088","wikidata":"https://www.wikidata.org/wiki/Q1441804","display_name":"Pitch accent","level":3,"score":0.5442276000976562},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5419538617134094},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5252489447593689},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4466167986392975},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.13380268216133118},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.09111997485160828},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3388818.3389159","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3388818.3389159","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 2nd International Conference on Image, Video and Signal Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4300000071525574,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1489125746","https://openalex.org/W1524333225","https://openalex.org/W1710082047","https://openalex.org/W1922655562","https://openalex.org/W1979211281","https://openalex.org/W2102113734","https://openalex.org/W2127141656","https://openalex.org/W2143612262","https://openalex.org/W2160815625","https://openalex.org/W2169883442","https://openalex.org/W2293009711","https://openalex.org/W2293634267","https://openalex.org/W2293858598","https://openalex.org/W2405866807","https://openalex.org/W2519224033","https://openalex.org/W2530876040","https://openalex.org/W2557283755","https://openalex.org/W2587080466","https://openalex.org/W2602765883","https://openalex.org/W2603679025","https://openalex.org/W2608712415","https://openalex.org/W2750499125","https://openalex.org/W2886411136","https://openalex.org/W2889488531","https://openalex.org/W2889494795","https://openalex.org/W2911985572","https://openalex.org/W2915722758","https://openalex.org/W2916018751","https://openalex.org/W2963211739","https://openalex.org/W2963266252","https://openalex.org/W2963920996","https://openalex.org/W6631362777","https://openalex.org/W6730267373"],"related_works":["https://openalex.org/W1539047115","https://openalex.org/W2160212752","https://openalex.org/W2143860603","https://openalex.org/W1664345252","https://openalex.org/W2141731574","https://openalex.org/W2139400702","https://openalex.org/W2113890879","https://openalex.org/W2113419766","https://openalex.org/W2351067521","https://openalex.org/W1991538182"],"abstract_inverted_index":{"A":[0],"major":[1],"challenge":[2],"in":[3,86,127],"Automatic":[4],"speech":[5,74,81],"recognition":[6],"(ASR)":[7],"systems":[8,24],"for":[9],"Mandarin":[10],"is":[11],"to":[12,15,33,49,71],"be":[13],"able":[14],"handle":[16],"speakers":[17],"with":[18,39,83,115],"different":[19],"kinds":[20],"of":[21,63,89],"accents.":[22],"ASR":[23],"that":[25,54,102,119],"are":[26],"trained":[27],"using":[28],"single-task":[29],"learning":[30,59,100],"underperform":[31],"due":[32],"poor":[34],"generalization":[35],"ability":[36],"when":[37],"confronted":[38],"a":[40,93,110,122,132],"new":[41],"accent.":[42],"In":[43],"this":[44],"paper,":[45],"we":[46,77,97,103,120],"explore":[47],"how":[48],"use":[50],"accent":[51,55,73,84,107],"sentences":[52],"information":[53,85],"embeddings":[56,90],"and":[57,109],"multi-task":[58,99],"on":[60],"the":[61,64,80,87],"basis":[62],"bidirectional":[65],"long":[66],"short":[67],"term":[68],"memory":[69],"(BLSTM)":[70],"improve":[72],"recognition.":[75],"Firstly":[76],"consider":[78],"augmenting":[79],"input":[82],"form":[88],"extracted":[91],"by":[92],"standalone":[94],"network.":[95],"Then":[96],"propose":[98],"architecture":[101],"jointly":[104],"learn":[105],"an":[106],"classifier":[108],"multi-accent":[111,133],"acoustic":[112],"model.":[113],"Experiments":[114],"these":[116],"methods":[117],"demonstrate":[118],"obtain":[121],"4%":[123],"average":[124],"relative":[125],"improvement":[126],"word":[128],"error":[129],"rate":[130],"over":[131],"baseline":[134],"system.":[135]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
