{"id":"https://openalex.org/W3206645560","doi":"https://doi.org/10.1109/taslp.2022.3172632","title":"Towards Robust Waveform-Based Acoustic Models","display_name":"Towards Robust Waveform-Based Acoustic Models","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W3206645560","doi":"https://doi.org/10.1109/taslp.2022.3172632","mag":"3206645560"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2022.3172632","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3172632","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2110.08634","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061934775","display_name":"Dino Ogli\u0107","orcid":"https://orcid.org/0000-0002-4728-9644"},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]},{"id":"https://openalex.org/I4210109524","display_name":"AstraZeneca (Australia)","ror":"https://ror.org/018nw3344","country_code":"AU","type":"company","lineage":["https://openalex.org/I105036370","https://openalex.org/I4210109524"]}],"countries":["AU","GB"],"is_corresponding":false,"raw_author_name":"Dino Oglic","raw_affiliation_strings":["Applied Analytics and AI, Data Sciences and AI, BioPharmaceuticals R&amp;D, AstraZeneca, Cambridge, U.K","Department of Engineering, King's College London, London, U.K"],"raw_orcid":"https://orcid.org/0000-0002-4728-9644","affiliations":[{"raw_affiliation_string":"Applied Analytics and AI, Data Sciences and AI, BioPharmaceuticals R&amp;D, AstraZeneca, Cambridge, U.K","institution_ids":["https://openalex.org/I4210109524"]},{"raw_affiliation_string":"Department of Engineering, King's College London, London, U.K","institution_ids":["https://openalex.org/I183935753"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056805951","display_name":"Zoran Cvetkovi\u0107","orcid":"https://orcid.org/0000-0002-5128-5099"},"institutions":[{"id":"https://openalex.org/I4210119896","display_name":"King's College School","ror":"https://ror.org/02bbqcn27","country_code":"GB","type":"education","lineage":["https://openalex.org/I4210119896"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zoran Cvetkovic","raw_affiliation_strings":["Department of Engineering, King&#x0027;s College London, London, U.K"],"raw_orcid":"https://orcid.org/0000-0002-5128-5099","affiliations":[{"raw_affiliation_string":"Department of Engineering, King&#x0027;s College London, London, U.K","institution_ids":["https://openalex.org/I4210119896"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069770227","display_name":"Peter Sollich","orcid":"https://orcid.org/0000-0003-0169-7893"},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]},{"id":"https://openalex.org/I4210119896","display_name":"King's College School","ror":"https://ror.org/02bbqcn27","country_code":"GB","type":"education","lineage":["https://openalex.org/I4210119896"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Peter Sollich","raw_affiliation_strings":["Department of Mathematics, King&#x0027;s College London, London, U.K"],"raw_orcid":"https://orcid.org/0000-0003-0169-7893","affiliations":[{"raw_affiliation_string":"Department of Mathematics, King&#x0027;s College London, London, U.K","institution_ids":["https://openalex.org/I4210119896","https://openalex.org/I183935753"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027442277","display_name":"Steve Renals","orcid":"https://orcid.org/0000-0002-8790-3389"},"institutions":[{"id":"https://openalex.org/I98677209","display_name":"University of Edinburgh","ror":"https://ror.org/01nrxwf90","country_code":"GB","type":"education","lineage":["https://openalex.org/I98677209"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Steve Renals","raw_affiliation_strings":["Center for Speech Technology Research, University of Edinburgh, Edinburgh, U.K"],"raw_orcid":"https://orcid.org/0000-0002-8790-3389","affiliations":[{"raw_affiliation_string":"Center for Speech Technology Research, University of Edinburgh, Edinburgh, U.K","institution_ids":["https://openalex.org/I98677209"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015318204","display_name":"Bin Yu","orcid":"https://orcid.org/0000-0002-7461-625X"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bin Yu","raw_affiliation_strings":["Departments of Statistics and Electrical Engineering and Computer Sciences, UC Berkeley, Berkeley, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Departments of Statistics and Electrical Engineering and Computer Sciences, UC Berkeley, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.2775,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.60246795,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"30","issue":null,"first_page":"1977","last_page":"1992"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.6911110877990723},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.43173205852508545},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.32361072301864624},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.11877095699310303}],"concepts":[{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.6911110877990723},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.43173205852508545},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32361072301864624},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.11877095699310303},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/taslp.2022.3172632","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2022.3172632","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2110.08634","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2110.08634","pdf_url":"https://arxiv.org/pdf/2110.08634","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:publications.goettingen-research-online.de:2/135672","is_oa":true,"landing_page_url":"https://resolver.sub.uni-goettingen.de/purl?gro-2/135672","pdf_url":null,"source":{"id":"https://openalex.org/S4306401634","display_name":"GoeScholar  The Publication Server of the Georg-August-Universit\u00e4t G\u00f6ttingen (Georg-August-Universit\u00e4t G\u00f6ttingen)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210122495","host_organization_name":"Asklepios Klinik St. Georg","host_organization_lineage":["https://openalex.org/I4210122495"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2110.08634","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2110.08634","pdf_url":"https://arxiv.org/pdf/2110.08634","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7599999904632568,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G4304376995","display_name":"SpeechWave","funder_award_id":"EP/R012067/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G6167047368","display_name":null,"funder_award_id":"EP/R012067/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":76,"referenced_works":["https://openalex.org/W1489048302","https://openalex.org/W1524333225","https://openalex.org/W1533861849","https://openalex.org/W1568183767","https://openalex.org/W1922655562","https://openalex.org/W1973669708","https://openalex.org/W1979651826","https://openalex.org/W1992475611","https://openalex.org/W2032558547","https://openalex.org/W2085267254","https://openalex.org/W2099621636","https://openalex.org/W2114973313","https://openalex.org/W2116217121","https://openalex.org/W2118020555","https://openalex.org/W2131548063","https://openalex.org/W2133815075","https://openalex.org/W2136439176","https://openalex.org/W2137295153","https://openalex.org/W2139698650","https://openalex.org/W2144068644","https://openalex.org/W2144908784","https://openalex.org/W2151239833","https://openalex.org/W2153773386","https://openalex.org/W2184343439","https://openalex.org/W2398826216","https://openalex.org/W2407080277","https://openalex.org/W2515753980","https://openalex.org/W2559260703","https://openalex.org/W2696967604","https://openalex.org/W2763188033","https://openalex.org/W2789942385","https://openalex.org/W2796892552","https://openalex.org/W2903799412","https://openalex.org/W2911634294","https://openalex.org/W2936774411","https://openalex.org/W2950048339","https://openalex.org/W2951735139","https://openalex.org/W2963403868","https://openalex.org/W2964012862","https://openalex.org/W2964052309","https://openalex.org/W2964138484","https://openalex.org/W2971109239","https://openalex.org/W2973049979","https://openalex.org/W2973053574","https://openalex.org/W2982427813","https://openalex.org/W2982456909","https://openalex.org/W2999905431","https://openalex.org/W3016042429","https://openalex.org/W3024464021","https://openalex.org/W3036601975","https://openalex.org/W3094855746","https://openalex.org/W3095947083","https://openalex.org/W3097919147","https://openalex.org/W3099782249","https://openalex.org/W3107298252","https://openalex.org/W3123517045","https://openalex.org/W3142067363","https://openalex.org/W3191151211","https://openalex.org/W3195185666","https://openalex.org/W4233141322","https://openalex.org/W4249820770","https://openalex.org/W4385245566","https://openalex.org/W4394666973","https://openalex.org/W6631362777","https://openalex.org/W6631943919","https://openalex.org/W6640090968","https://openalex.org/W6675409298","https://openalex.org/W6681599957","https://openalex.org/W6682208247","https://openalex.org/W6739901393","https://openalex.org/W6748645487","https://openalex.org/W6758684365","https://openalex.org/W6764522968","https://openalex.org/W6769802879","https://openalex.org/W6780218876","https://openalex.org/W6780226713"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W1974895211","https://openalex.org/W2176409448","https://openalex.org/W2129841057","https://openalex.org/W3040712279","https://openalex.org/W2364769705","https://openalex.org/W2056136368","https://openalex.org/W2374664672","https://openalex.org/W4367555392"],"abstract_inverted_index":{"We":[0,126],"study":[1],"the":[2,29,66,71,75,82,87,90,123,129,181,189,208,214,230],"problem":[3,23],"of":[4,25,31,52,81,89,110,143,234],"learning":[5,124],"robust":[6,119],"acoustic":[7,147,231],"models":[8,221],"in":[9,40,86,146,201],"adverse":[10],"environments,":[11],"characterized":[12],"by":[13,64],"a":[14,108,224],"significant":[15],"mismatch":[16],"between":[17],"training":[18,63,91,102,206,225],"and":[19,112,173],"test":[20,235],"conditions.":[21],"This":[22],"is":[24],"paramount":[26],"importance":[27],"for":[28],"deployment":[30],"speech":[32],"recognition":[33],"systems":[34],"that":[35,69,97,115,188],"need":[36],"to":[37,139,157,193,205,220,228],"perform":[38],"well":[39],"unseen":[41,194],"environments.":[42],"First,":[43],"we":[44,95,178],"characterize":[45],"data":[46,135],"augmentation":[47,136],"theoretically":[48,114],"as":[49],"an":[50,79],"instance":[51],"vicinal":[53],"risk":[54,60,210],"minimization,":[55],"which":[56,160],"aims":[57],"at":[58,101],"improving":[59],"estimates":[61],"during":[62],"replacing":[65],"delta":[67],"functions":[68],"define":[70],"empirical":[72,185],"density":[73,85],"over":[74],"input":[76],"space":[77],"with":[78,164,197],"approximation":[80],"marginal":[83],"population":[84],"vicinity":[88],"samples.":[92],"More":[93],"specifically,":[94],"assume":[96],"local":[98],"neighborhoods":[99],"centered":[100],"samples":[103],"can":[104,117,191],"be":[105],"approximated":[106],"using":[107,207,223],"mixture":[109,131],"Gaussians,":[111],"demonstrate":[113,216],"this":[116],"incorporate":[118],"inductive":[120],"bias":[121],"into":[122],"process.":[125],"then":[127],"specify":[128],"individual":[130],"components":[132],"implicitly":[133],"via":[134],"schemes,":[137],"designed":[138,227],"address":[140],"common":[141],"sources":[142],"spurious":[144],"correlations":[145],"models.":[148],"To":[149],"avoid":[150],"potential":[151],"confounding":[152],"effects":[153],"on":[154,180],"robustness":[155],"due":[156],"information":[158],"loss,":[159],"has":[161],"been":[162],"associated":[163],"standard":[165,209],"feature":[166],"extraction":[167],"techniques":[168],"(e.g.,":[169],"<sc":[170,174],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[171,175],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">fbank</small>":[172],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">mfcc</small>":[176],"features),":[177],"focus":[179],"waveform-based":[182],"setting.":[183],"Our":[184],"results":[186,215],"show":[187],"approach":[190],"generalize":[192],"noise":[195],"conditions,":[196],"150%":[198],"relative":[199,219],"improvement":[200],"out-of-distribution":[202],"generalization":[203],"compared":[204],"minimization":[211],"principle.":[212],"Moreover,":[213],"competitive":[217],"performance":[218],"learned":[222],"sample":[226],"match":[229],"conditions":[232],"characteristic":[233],"utterances.":[236]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
