{"id":"https://openalex.org/W2549762342","doi":"https://doi.org/10.1109/mlsp.2016.7738854","title":"Improving speech recognition using limited accent diverse British English training data with deep neural networks","display_name":"Improving speech recognition using limited accent diverse British English training data with deep neural networks","publication_year":2016,"publication_date":"2016-09-01","ids":{"openalex":"https://openalex.org/W2549762342","doi":"https://doi.org/10.1109/mlsp.2016.7738854","mag":"2549762342"},"language":"en","primary_location":{"id":"doi:10.1109/mlsp.2016.7738854","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp.2016.7738854","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 26th International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112035956","display_name":"Maryam Najafian","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]},{"id":"https://openalex.org/I79619799","display_name":"University of Birmingham","ror":"https://ror.org/03angcq70","country_code":"GB","type":"education","lineage":["https://openalex.org/I79619799"]}],"countries":["GB","US"],"is_corresponding":true,"raw_author_name":"Maryam Najafian","raw_affiliation_strings":["Center for Robust Speech Systems, University of Texas at Dallas, Richardson, TX, USA","School of Electronic, Electrical & Systems Engineering, University of Birmingham, Birmingham, UK"],"affiliations":[{"raw_affiliation_string":"Center for Robust Speech Systems, University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]},{"raw_affiliation_string":"School of Electronic, Electrical & Systems Engineering, University of Birmingham, Birmingham, UK","institution_ids":["https://openalex.org/I79619799"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086899221","display_name":"Saeid Safavi","orcid":"https://orcid.org/0000-0002-3000-418X"},"institutions":[{"id":"https://openalex.org/I79619799","display_name":"University of Birmingham","ror":"https://ror.org/03angcq70","country_code":"GB","type":"education","lineage":["https://openalex.org/I79619799"]},{"id":"https://openalex.org/I141584323","display_name":"University of Hertfordshire","ror":"https://ror.org/0267vjk41","country_code":"GB","type":"education","lineage":["https://openalex.org/I141584323"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Saeid Safavi","raw_affiliation_strings":["School of Electronic, Electrical & Systems Engineering, University of Birmingham, Birmingham, UK","School of Engineering & Technology, University of Hertfordshire, Hatfield, UK"],"affiliations":[{"raw_affiliation_string":"School of Electronic, Electrical & Systems Engineering, University of Birmingham, Birmingham, UK","institution_ids":["https://openalex.org/I79619799"]},{"raw_affiliation_string":"School of Engineering & Technology, University of Hertfordshire, Hatfield, UK","institution_ids":["https://openalex.org/I141584323"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057910370","display_name":"John H. L. Hansen","orcid":"https://orcid.org/0000-0003-1382-9929"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John H. L. Hansen","raw_affiliation_strings":["Center for Robust Speech Systems, University of Texas at Dallas, Richardson, TX, USA"],"affiliations":[{"raw_affiliation_string":"Center for Robust Speech Systems, University of Texas at Dallas, Richardson, TX, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079477816","display_name":"Martin Russell","orcid":"https://orcid.org/0000-0002-8324-4961"},"institutions":[{"id":"https://openalex.org/I79619799","display_name":"University of Birmingham","ror":"https://ror.org/03angcq70","country_code":"GB","type":"education","lineage":["https://openalex.org/I79619799"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Martin Russell","raw_affiliation_strings":["School of Electronic, Electrical & Systems Engineering, University of Birmingham, Birmingham, UK"],"affiliations":[{"raw_affiliation_string":"School of Electronic, Electrical & Systems Engineering, University of Birmingham, Birmingham, UK","institution_ids":["https://openalex.org/I79619799"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5112035956"],"corresponding_institution_ids":["https://openalex.org/I162577319","https://openalex.org/I79619799"],"apc_list":null,"apc_paid":null,"fwci":3.4273,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.9376969,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stress","display_name":"Stress (linguistics)","score":0.8476462364196777},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7049589157104492},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6649031043052673},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.6093870401382446},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5189116597175598},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.49097612500190735},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.4812021851539612},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4795115292072296},{"id":"https://openalex.org/keywords/pitch-accent","display_name":"Pitch accent","score":0.4645402729511261},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.43664786219596863},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3798682689666748}],"concepts":[{"id":"https://openalex.org/C2776756274","wikidata":"https://www.wikidata.org/wiki/Q181767","display_name":"Stress (linguistics)","level":2,"score":0.8476462364196777},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7049589157104492},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6649031043052673},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.6093870401382446},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5189116597175598},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.49097612500190735},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.4812021851539612},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4795115292072296},{"id":"https://openalex.org/C2777672088","wikidata":"https://www.wikidata.org/wiki/Q1441804","display_name":"Pitch accent","level":3,"score":0.4645402729511261},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.43664786219596863},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3798682689666748},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mlsp.2016.7738854","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mlsp.2016.7738854","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 26th International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.800000011920929,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320273","display_name":"University of Cambridge","ror":"https://ror.org/013meh722"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W5524598","https://openalex.org/W135246240","https://openalex.org/W143647774","https://openalex.org/W189596042","https://openalex.org/W1581166685","https://openalex.org/W1969685872","https://openalex.org/W1971734011","https://openalex.org/W1992155534","https://openalex.org/W2001285277","https://openalex.org/W2003573652","https://openalex.org/W2015633636","https://openalex.org/W2018887914","https://openalex.org/W2025929295","https://openalex.org/W2079623482","https://openalex.org/W2103962059","https://openalex.org/W2116064496","https://openalex.org/W2130722890","https://openalex.org/W2132211083","https://openalex.org/W2138857742","https://openalex.org/W2144792281","https://openalex.org/W2146591867","https://openalex.org/W2147768505","https://openalex.org/W2147986626","https://openalex.org/W2150769028","https://openalex.org/W2154252957","https://openalex.org/W2160815625","https://openalex.org/W2183209506","https://openalex.org/W2266728862","https://openalex.org/W2294108103","https://openalex.org/W2294831733","https://openalex.org/W2294933947","https://openalex.org/W2394932179","https://openalex.org/W2402145928","https://openalex.org/W2403894853","https://openalex.org/W2405866807","https://openalex.org/W2406395231","https://openalex.org/W2406718879","https://openalex.org/W2485217158","https://openalex.org/W2505434962","https://openalex.org/W2510217521","https://openalex.org/W2515107847","https://openalex.org/W2525241666","https://openalex.org/W2606321545","https://openalex.org/W6607775107","https://openalex.org/W6650851154","https://openalex.org/W6680300913","https://openalex.org/W6681529571","https://openalex.org/W6693313066","https://openalex.org/W6697043880","https://openalex.org/W6697306262","https://openalex.org/W6713242239","https://openalex.org/W6713969595","https://openalex.org/W6722592384","https://openalex.org/W6727295278"],"related_works":["https://openalex.org/W2088008556","https://openalex.org/W4360877803","https://openalex.org/W2334135487","https://openalex.org/W4298046075","https://openalex.org/W4207066001","https://openalex.org/W2381837697","https://openalex.org/W4251666207","https://openalex.org/W2087397317","https://openalex.org/W2350046259","https://openalex.org/W2089671023"],"abstract_inverted_index":{"Despite":[0],"the":[1,45,85,96,115,135,144,149,152,155,162,177,189,199,231],"recent":[2],"advances":[3],"in":[4,43,148,213],"acoustic":[5,120],"modelling":[6,8,121],"tasks":[7],"speech":[9],"data":[10,160,197,223],"coming":[11],"from":[12,198,226],"different":[13],"speakers":[14],"with":[15,130,192,240,253],"varying":[16],"accents,":[17],"age,":[18],"and":[19,110,154],"speaking":[20],"styles":[21],"is":[22,41,90,174],"a":[23,53,59,72,79,140,167,193,210,217],"fundamental":[24],"challenge":[25],"for":[26,65,78,249],"Deep":[27],"Neural":[28],"Networks":[29],"(DNNs)":[30],"based":[31,68,119,126,238],"Automatic":[32],"Speech":[33],"Recognition":[34],"(ASR).":[35],"A":[36],"relative":[37,86],"gain":[38,170,212],"of":[39,47,95,117,146,157,171,196,205,220,243],"46.85%":[40],"achieved":[42,175],"recognising":[44],"Accents":[46],"British":[48],"Isles":[49],"corpus":[50],"by":[51],"applying":[52],"baseline":[54],"DNN":[55,67,118,237],"model":[56],"rather":[57],"than":[58,93],"Gaussian":[60],"mixture":[61],"model.":[62],"However,":[63],"even":[64],"powerful":[66],"systems":[69],"accents":[70],"remain":[71],"challenge.":[73],"Our":[74,180],"study":[75,142],"shows":[76],"that":[77,94,183],"`difficult'":[80,201],"accent":[81,127,150,186,202,221,228],"such":[82],"as":[83,215],"Glaswegian":[84,206],"word":[87],"error":[88],"rate":[89],"78.9%":[91],"higher":[92],"standard":[97],"southern":[98],"English":[99],"accent.":[100],"In":[101],"this":[102],"work":[103],"we":[104],"propose":[105],"four":[106],"multi-accent":[107],"learning":[108],"strategies,":[109],"evaluate":[111],"their":[112],"effectiveness":[113],"within":[114],"context":[116],"framework.":[122],"Using":[123],"an":[124],"i-vector":[125],"identification":[128],"system":[129],"78%":[131],"accuracy":[132],"to":[133,209],"label":[134],"training":[136,159,190,250],"data.":[137],"We":[138],"present":[139],"novel":[141],"on":[143,161,236],"effect":[145],"increase":[147],"diversity,":[151],"`difficulty'":[153],"amount":[156,195,219,242],"supplemented":[158],"ASR":[163,169],"performance.":[164],"On":[165],"average":[166],"further":[168],"27.24":[172],"%":[173],"using":[176,216],"proposed":[178],"strategies.":[179],"results":[181],"show":[182],"across":[184],"all":[185,251],"regions":[187],"supplementing":[188],"set":[191],"small":[194],"most":[200],"(2.25":[203],"hours":[204,225],"accent)":[207],"leads":[208],"similar":[211],"performance":[214],"large":[218],"diverse":[222],"(8.96":[224],"14":[227],"regions).":[229],"Although":[230],"ideas":[232],"presented":[233],"are":[234,247],"focused":[235],"analysis":[239],"limited":[241,255],"multi-accented":[244],"data,":[245],"they":[246],"applicable":[248],"classifiers":[252],"multi-conditional":[254],"resources.":[256]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
