{"id":"https://openalex.org/W4210341744","doi":"https://doi.org/10.1109/asru51503.2021.9688310","title":"Improving Speech Recognition on Noisy Speech via Speech Enhancement with Multi-Discriminators CycleGAN","display_name":"Improving Speech Recognition on Noisy Speech via Speech Enhancement with Multi-Discriminators CycleGAN","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4210341744","doi":"https://doi.org/10.1109/asru51503.2021.9688310"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9688310","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688310","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102495766","display_name":"Chia-Yu Li","orcid":null},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Chia-Yu Li","raw_affiliation_strings":["Institute of Natural Language Processing, University of Stuttgart,Germany","Institute of Natural Language Processing, University of Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Natural Language Processing, University of Stuttgart,Germany","institution_ids":["https://openalex.org/I100066346"]},{"raw_affiliation_string":"Institute of Natural Language Processing, University of Stuttgart, Germany","institution_ids":["https://openalex.org/I100066346"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020700841","display_name":"Ngoc Thang Vu","orcid":"https://orcid.org/0000-0001-7893-9147"},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ngoc Thang Vu","raw_affiliation_strings":["Institute of Natural Language Processing, University of Stuttgart,Germany","Institute of Natural Language Processing, University of Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Natural Language Processing, University of Stuttgart,Germany","institution_ids":["https://openalex.org/I100066346"]},{"raw_affiliation_string":"Institute of Natural Language Processing, University of Stuttgart, Germany","institution_ids":["https://openalex.org/I100066346"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5102495766"],"corresponding_institution_ids":["https://openalex.org/I100066346"],"apc_list":null,"apc_paid":null,"fwci":1.2042,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.86501007,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"830","last_page":"836"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8362928628921509},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7909705638885498},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.7493203282356262},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.6209547519683838},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.580754280090332},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5650264024734497},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5514654517173767},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.5207740664482117},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4731658101081848},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44560328125953674},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3672044575214386},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.16169461607933044},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.09579136967658997}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8362928628921509},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7909705638885498},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.7493203282356262},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.6209547519683838},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.580754280090332},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5650264024734497},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5514654517173767},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.5207740664482117},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4731658101081848},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44560328125953674},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3672044575214386},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.16169461607933044},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.09579136967658997},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru51503.2021.9688310","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688310","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7300000190734863,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W1731081199","https://openalex.org/W1897240248","https://openalex.org/W1973681148","https://openalex.org/W2044893557","https://openalex.org/W2069681747","https://openalex.org/W2094461119","https://openalex.org/W2141411743","https://openalex.org/W2194775991","https://openalex.org/W2290318471","https://openalex.org/W2402146185","https://openalex.org/W2405774341","https://openalex.org/W2749588430","https://openalex.org/W2888858245","https://openalex.org/W2889500840","https://openalex.org/W2949399848","https://openalex.org/W2962793481","https://openalex.org/W2963057973","https://openalex.org/W2963727906","https://openalex.org/W2973220283","https://openalex.org/W4253928870","https://openalex.org/W4320013936","https://openalex.org/W6631190155","https://openalex.org/W6631362777","https://openalex.org/W6637618735","https://openalex.org/W6639532686","https://openalex.org/W6662018355","https://openalex.org/W6668037159","https://openalex.org/W6674259212","https://openalex.org/W6729881831"],"related_works":["https://openalex.org/W2120771489","https://openalex.org/W2294333436","https://openalex.org/W2653598178","https://openalex.org/W2373767407","https://openalex.org/W3110551121","https://openalex.org/W2131486661","https://openalex.org/W2089240210","https://openalex.org/W642007152","https://openalex.org/W2072884270","https://openalex.org/W4200596008"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"our":[3,92],"latest":[4],"investigations":[5],"on":[6,72,85,94,107,116],"improving":[7],"automatic":[8,35],"speech":[9,13,15,30,36,47],"recognition":[10,37],"for":[11,46],"noisy":[12],"via":[14],"enhancement.":[16],"We":[17,90],"propose":[18],"a":[19],"novel":[20],"method":[21,41,93],"named":[22],"Multi-discriminators":[23],"CycleGAN":[24,44],"to":[25,101,113],"reduce":[26],"noise":[27],"of":[28,75],"input":[29],"and":[31,53,98,111],"therefore":[32],"improve":[33,54],"the":[34,43,76,87,108,117],"performance.":[38],"Our":[39],"proposed":[40],"leverages":[42],"framework":[45],"enhancement":[48],"without":[49],"any":[50],"parallel":[51],"data":[52,78,96],"it":[55],"by":[56],"introducing":[57],"multiple":[58,70],"discriminators":[59],"that":[60,68],"check":[61],"different":[62],"frequency":[63],"areas.":[64],"Furthermore,":[65],"we":[66],"show":[67],"training":[69,77,82,88],"generators":[71],"homogeneous":[73],"subset":[74],"is":[79],"better":[80],"than":[81],"one":[83],"generator":[84],"all":[86],"data.":[89],"evaluate":[91],"CHiME-3":[95],"set":[97,110],"observe":[99],"up":[100,112],"10.03":[102],"%":[103,115],"relatively":[104],"WER":[105],"improvement":[106],"development":[109],"14.09":[114],"evaluation":[118],"set.":[119]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
