{"id":"https://openalex.org/W3026870722","doi":"https://doi.org/10.1109/icarsc49921.2020.9096166","title":"Overcoming Data Scarcity in Speaker Identification: Dataset Augmentation with Synthetic MFCCs via Character-level RNN","display_name":"Overcoming Data Scarcity in Speaker Identification: Dataset Augmentation with Synthetic MFCCs via Character-level RNN","publication_year":2020,"publication_date":"2020-04-01","ids":{"openalex":"https://openalex.org/W3026870722","doi":"https://doi.org/10.1109/icarsc49921.2020.9096166","mag":"3026870722"},"language":"en","primary_location":{"id":"doi:10.1109/icarsc49921.2020.9096166","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icarsc49921.2020.9096166","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Autonomous Robot Systems and Competitions (ICARSC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080362193","display_name":"Jordan J. Bird","orcid":"https://orcid.org/0000-0002-9858-1231"},"institutions":[{"id":"https://openalex.org/I169199633","display_name":"Aston University","ror":"https://ror.org/05j0ve876","country_code":"GB","type":"education","lineage":["https://openalex.org/I169199633"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Jordan J. Bird","raw_affiliation_strings":["ARVIS Lab, Aston University, Birmingham, UK"],"affiliations":[{"raw_affiliation_string":"ARVIS Lab, Aston University, Birmingham, UK","institution_ids":["https://openalex.org/I169199633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032454100","display_name":"Diego R. Faria","orcid":"https://orcid.org/0000-0002-2771-1713"},"institutions":[{"id":"https://openalex.org/I169199633","display_name":"Aston University","ror":"https://ror.org/05j0ve876","country_code":"GB","type":"education","lineage":["https://openalex.org/I169199633"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Diego R. Faria","raw_affiliation_strings":["ARVIS Lab, Aston University, Birmingham, UK"],"affiliations":[{"raw_affiliation_string":"ARVIS Lab, Aston University, Birmingham, UK","institution_ids":["https://openalex.org/I169199633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039508418","display_name":"Cristiano Premebida","orcid":"https://orcid.org/0000-0002-2168-2077"},"institutions":[{"id":"https://openalex.org/I4210125590","display_name":"Institute for Systems Engineering and Computers","ror":"https://ror.org/033wn8m60","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I4210125590"]},{"id":"https://openalex.org/I76903346","display_name":"University of Coimbra","ror":"https://ror.org/04z8k9a98","country_code":"PT","type":"education","lineage":["https://openalex.org/I76903346"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Cristiano Premebida","raw_affiliation_strings":["Institute of Systems and Robotics, University of Coimbra, Coimbra, Portugal"],"affiliations":[{"raw_affiliation_string":"Institute of Systems and Robotics, University of Coimbra, Coimbra, Portugal","institution_ids":["https://openalex.org/I4210125590","https://openalex.org/I76903346"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076983719","display_name":"Anik\u00f3 Ek\u00e1rt","orcid":"https://orcid.org/0000-0001-6967-5397"},"institutions":[{"id":"https://openalex.org/I169199633","display_name":"Aston University","ror":"https://ror.org/05j0ve876","country_code":"GB","type":"education","lineage":["https://openalex.org/I169199633"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Aniko Ekart","raw_affiliation_strings":["School of Engineering and Applied Science, Aston University, Birmingham, UK"],"affiliations":[{"raw_affiliation_string":"School of Engineering and Applied Science, Aston University, Birmingham, UK","institution_ids":["https://openalex.org/I169199633"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044467131","display_name":"Pedro P. S. Ayrosa","orcid":null},"institutions":[{"id":"https://openalex.org/I127110123","display_name":"Universidade Estadual de Londrina","ror":"https://ror.org/01585b035","country_code":"BR","type":"education","lineage":["https://openalex.org/I127110123"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Pedro P. S. Ayrosa","raw_affiliation_strings":["Department of Computer Science, Universidade Estadual de Londrina, Londrina, Brazil"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Universidade Estadual de Londrina, Londrina, Brazil","institution_ids":["https://openalex.org/I127110123"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5080362193"],"corresponding_institution_ids":["https://openalex.org/I169199633"],"apc_list":null,"apc_paid":null,"fwci":0.6628,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":{"value":0.75276289,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"146","last_page":"151"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8190609216690063},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6225319504737854},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.6072754263877869},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5187128186225891},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.49346888065338135},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4697105288505554},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.464457243680954},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.447915643453598},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.44596540927886963},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.37249720096588135},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.33966419100761414}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8190609216690063},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6225319504737854},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.6072754263877869},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5187128186225891},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.49346888065338135},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4697105288505554},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.464457243680954},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.447915643453598},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.44596540927886963},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.37249720096588135},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.33966419100761414},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icarsc49921.2020.9096166","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icarsc49921.2020.9096166","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Autonomous Robot Systems and Competitions (ICARSC)","raw_type":"proceedings-article"},{"id":"pmh:oai:irep.ntu.ac.uk:48136","is_oa":false,"landing_page_url":"http://irep.ntu.ac.uk/id/eprint/48136/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400559","display_name":"Nottingham Trent University's Institutional Repository (Nottingham Trent Repository)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I52590639","host_organization_name":"Nottingham Trent University","host_organization_lineage":["https://openalex.org/I52590639"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Book Section"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.699999988079071,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W12796654","https://openalex.org/W99344339","https://openalex.org/W128472000","https://openalex.org/W184864795","https://openalex.org/W196214544","https://openalex.org/W655565512","https://openalex.org/W917558981","https://openalex.org/W1522301498","https://openalex.org/W1553469512","https://openalex.org/W1637570796","https://openalex.org/W1810943226","https://openalex.org/W1850742715","https://openalex.org/W1981151789","https://openalex.org/W1989734195","https://openalex.org/W2021708492","https://openalex.org/W2062826588","https://openalex.org/W2103869314","https://openalex.org/W2137619888","https://openalex.org/W2140685304","https://openalex.org/W2236688737","https://openalex.org/W2293634267","https://openalex.org/W2495750388","https://openalex.org/W2603567530","https://openalex.org/W2726515241","https://openalex.org/W2747914378","https://openalex.org/W2752442988","https://openalex.org/W2760938034","https://openalex.org/W2767052532","https://openalex.org/W2807546609","https://openalex.org/W2807786846","https://openalex.org/W2884065486","https://openalex.org/W2889385246","https://openalex.org/W2912341770","https://openalex.org/W2959099235","https://openalex.org/W2962741254","https://openalex.org/W2962760690","https://openalex.org/W2962862718","https://openalex.org/W2963545917","https://openalex.org/W2963609956","https://openalex.org/W2963942586","https://openalex.org/W2964087600","https://openalex.org/W2964121744","https://openalex.org/W2981461916","https://openalex.org/W4210694145","https://openalex.org/W4231807801","https://openalex.org/W4255193866","https://openalex.org/W4288091954","https://openalex.org/W6600520985","https://openalex.org/W6607549202","https://openalex.org/W6607974698","https://openalex.org/W6624407840","https://openalex.org/W6631190155","https://openalex.org/W6636885848","https://openalex.org/W6638273328","https://openalex.org/W6639118987","https://openalex.org/W6680559069","https://openalex.org/W6689876912","https://openalex.org/W6759258252","https://openalex.org/W6769178842"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2149220986","https://openalex.org/W1493012537","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W1999004162","https://openalex.org/W2125642021","https://openalex.org/W1521049138","https://openalex.org/W2023466863","https://openalex.org/W2696990509"],"abstract_inverted_index":{"Autonomous":[0],"speaker":[1,17,54,99,117,146,221],"identification":[2],"suffers":[3],"issues":[4,263],"of":[5,16,52,75,88,105,140,243],"data":[6,34,62,100,128,193,207,212,252],"scarcity":[7],"due":[8,264],"to":[9,13,19,26,31,114,125,136,150,189,230,235,249,265],"it":[10],"being":[11],"unrealistic":[12],"gather":[14],"hours":[15],"audio":[18],"form":[20,104],"a":[21,66,72,86,108,215],"dataset,":[22],"which":[23],"inevitably":[24],"leads":[25],"class":[27,266],"imbalance":[28,267],"in":[29,102,112,148],"comparison":[30],"the":[32,50,58,89,95,103,116,119,130,137,144,159,178,185,190,220],"large":[33],"availability":[35],"from":[36,118,123,158,171,232,241,247],"non-speakers":[37],"since":[38],"large-scale":[39],"speech":[40,176,211],"datasets":[41,172],"are":[42,133,228,268],"available":[43],"online.":[44],"In":[45],"this":[46],"study,":[47],"we":[48],"explore":[49],"possibility":[51],"improving":[53],"recognition":[55],"by":[56,64,214],"augmenting":[57],"dataset":[59,91,139],"with":[60,174,201],"synthetic":[61,98,127,160,175,210,251],"produced":[63],"training":[65],"Character-level":[67],"Recurrent":[68],"Neural":[69],"Network":[70],"on":[71,85],"short":[73,205],"clip":[74],"five":[76,203],"spoken":[77,204],"sentences.":[78],"A":[79],"deep":[80],"neural":[81],"network":[82,131],"is":[83],"trained":[84],"selection":[87],"Flickr8k":[90,142],"as":[92,94,107],"well":[93],"real":[96,145,192],"and":[97,143,184,225,258],"(all":[101],"MFCCs)":[106],"binary":[109],"classification":[110,179,222],"problem":[111],"order":[113,149],"discern":[115,151],"Flickr":[120],"speakers.":[121],"Ranging":[122],"2,500":[124],"10,000":[126],"objects,":[129],"weights":[132],"then":[134],"transferred":[135],"original":[138],"only":[141],"data,":[147],"whether":[152],"useful":[153],"rules":[154],"can":[155,218],"be":[156],"learnt":[157],"data.":[161,196],"Results":[162],"for":[163,237],"all":[164],"three":[165,238],"subjects":[166,239],"show":[167,261],"that":[168,199,262],"fine-tune":[169],"learning":[170],"augmented":[173],"improve":[177,219,231],"accuracy,":[180],"F1":[181,255],"score,":[182],"precision,":[183],"recall":[186,259],"when":[187,245],"applied":[188],"scarce":[191],"vs":[194],"non-speaker":[195],"We":[197],"conclude":[198],"even":[200],"just":[202],"sentences,":[206],"augmentation":[208],"via":[209],"generated":[213],"Char-":[216],"RNN":[217],"process.":[223],"Accuracy":[224],"related":[226],"metrics":[227],"shown":[229],"around":[233],"93%":[234],"99%":[236],"classified":[240],"thousands":[242],"others":[244],"fine-tuning":[246],"exposure":[248],"2500-1000":[250],"points.":[253],"High":[254],"scores,":[256],"precision":[257],"also":[260,269],"solved.":[270]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
