{"id":"https://openalex.org/W2963045393","doi":"https://doi.org/10.1109/taslp.2018.2821903","title":"End-to-End Waveform Utterance Enhancement for Direct Evaluation Metrics Optimization by Fully Convolutional Neural Networks","display_name":"End-to-End Waveform Utterance Enhancement for Direct Evaluation Metrics Optimization by Fully Convolutional Neural Networks","publication_year":2018,"publication_date":"2018-04-06","ids":{"openalex":"https://openalex.org/W2963045393","doi":"https://doi.org/10.1109/taslp.2018.2821903","mag":"2963045393"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2018.2821903","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2018.2821903","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/8361959/08331910.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://ieeexplore.ieee.org/ielx7/6570655/8361959/08331910.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071471469","display_name":"Szu\u2010Wei Fu","orcid":"https://orcid.org/0000-0002-3487-8212"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Szu-Wei Fu","raw_affiliation_strings":["Department of Computer Science and Information Engineering, National Taiwan University, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Information Engineering, National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060987431","display_name":"Tao-Wei Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210086894","display_name":"Research Center for Information Technology Innovation, Academia Sinica","ror":"https://ror.org/000zgvm20","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086894","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Tao-Wei Wang","raw_affiliation_strings":["Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210086894"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044008055","display_name":"Yu Tsao","orcid":"https://orcid.org/0000-0001-6956-0418"},"institutions":[{"id":"https://openalex.org/I4210086894","display_name":"Research Center for Information Technology Innovation, Academia Sinica","ror":"https://ror.org/000zgvm20","country_code":"TW","type":"facility","lineage":["https://openalex.org/I4210086894","https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu Tsao","raw_affiliation_strings":["Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Research Center for Information Technology Innovation, Academia Sinica, Taipei, Taiwan","institution_ids":["https://openalex.org/I4210086894"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034792613","display_name":"Xugang Lu","orcid":"https://orcid.org/0000-0001-7075-448X"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Xugang Lu","raw_affiliation_strings":["National Institute of Information and Communications Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114514387","display_name":"Hisashi Kawai","orcid":"https://orcid.org/0000-0002-0914-5092"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hisashi Kawai","raw_affiliation_strings":["National Institute of Information and Communications Technology, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Tokyo, Japan","institution_ids":["https://openalex.org/I90023481"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5071471469"],"corresponding_institution_ids":["https://openalex.org/I16733864"],"apc_list":null,"apc_paid":null,"fwci":32.3377,"has_fulltext":true,"cited_by_count":339,"citation_normalized_percentile":{"value":0.99876423,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"26","issue":"9","first_page":"1570","last_page":"1584"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.7578330636024475},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7490753531455994},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6114650964736938},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5522555708885193},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.5456174612045288},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5161153078079224},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4652700424194336},{"id":"https://openalex.org/keywords/mean-squared-error","display_name":"Mean squared error","score":0.4476352632045746},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39556074142456055},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12834253907203674},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09844928979873657},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.08071175217628479},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.08018207550048828}],"concepts":[{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.7578330636024475},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7490753531455994},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6114650964736938},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5522555708885193},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5456174612045288},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5161153078079224},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4652700424194336},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.4476352632045746},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39556074142456055},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12834253907203674},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09844928979873657},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.08071175217628479},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.08018207550048828},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2018.2821903","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2018.2821903","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/8361959/08331910.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/taslp.2018.2821903","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2018.2821903","pdf_url":"https://ieeexplore.ieee.org/ielx7/6570655/8361959/08331910.pdf","source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7900000214576721,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G1224500915","display_name":null,"funder_award_id":"MOST 106-3114-E-011-004-","funder_id":"https://openalex.org/F4320322795","funder_display_name":"Ministry of Science and Technology, Taiwan"}],"funders":[{"id":"https://openalex.org/F4320322795","display_name":"Ministry of Science and Technology, Taiwan","ror":"https://ror.org/02kv4zf79"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2963045393.pdf","grobid_xml":"https://content.openalex.org/works/W2963045393.grobid-xml"},"referenced_works_count":106,"referenced_works":["https://openalex.org/W18715868","https://openalex.org/W1482149378","https://openalex.org/W1495679096","https://openalex.org/W1498717522","https://openalex.org/W1522301498","https://openalex.org/W1552314771","https://openalex.org/W1790748249","https://openalex.org/W1836465849","https://openalex.org/W1897240248","https://openalex.org/W1903029394","https://openalex.org/W1963950237","https://openalex.org/W1974932989","https://openalex.org/W1980741455","https://openalex.org/W1982808783","https://openalex.org/W1989034586","https://openalex.org/W1989392497","https://openalex.org/W2044893557","https://openalex.org/W2046869671","https://openalex.org/W2064525251","https://openalex.org/W2066754815","https://openalex.org/W2069681747","https://openalex.org/W2070126272","https://openalex.org/W2078528584","https://openalex.org/W2084636655","https://openalex.org/W2095072097","https://openalex.org/W2101045344","https://openalex.org/W2103829944","https://openalex.org/W2121973264","https://openalex.org/W2139493862","https://openalex.org/W2141998673","https://openalex.org/W2147455188","https://openalex.org/W2148457072","https://openalex.org/W2163101527","https://openalex.org/W2169273265","https://openalex.org/W2221409856","https://openalex.org/W2290548146","https://openalex.org/W2291877678","https://openalex.org/W2341800275","https://openalex.org/W2384495648","https://openalex.org/W2397226255","https://openalex.org/W2402837605","https://openalex.org/W2403766732","https://openalex.org/W2405774341","https://openalex.org/W2406737436","https://openalex.org/W2478884216","https://openalex.org/W2516342150","https://openalex.org/W2516547830","https://openalex.org/W2517704359","https://openalex.org/W2517760955","https://openalex.org/W2527611302","https://openalex.org/W2550397165","https://openalex.org/W2560747691","https://openalex.org/W2594809597","https://openalex.org/W2605138598","https://openalex.org/W2605589342","https://openalex.org/W2619993508","https://openalex.org/W2625041691","https://openalex.org/W2626799581","https://openalex.org/W2649558613","https://openalex.org/W2696558042","https://openalex.org/W2734774145","https://openalex.org/W2746457594","https://openalex.org/W2747161606","https://openalex.org/W2755891984","https://openalex.org/W2765425905","https://openalex.org/W2766607545","https://openalex.org/W2771275309","https://openalex.org/W2774389566","https://openalex.org/W2785961982","https://openalex.org/W2914048585","https://openalex.org/W2949497149","https://openalex.org/W2951093759","https://openalex.org/W2952367144","https://openalex.org/W2954695182","https://openalex.org/W2962866211","https://openalex.org/W2963103134","https://openalex.org/W2963321191","https://openalex.org/W2963341071","https://openalex.org/W2963351212","https://openalex.org/W2963828919","https://openalex.org/W3124794156","https://openalex.org/W3147539069","https://openalex.org/W4253928870","https://openalex.org/W4254751698","https://openalex.org/W6600780989","https://openalex.org/W6631190155","https://openalex.org/W6638667902","https://openalex.org/W6639532686","https://openalex.org/W6704322773","https://openalex.org/W6710709672","https://openalex.org/W6712317276","https://openalex.org/W6712965539","https://openalex.org/W6713298849","https://openalex.org/W6713658392","https://openalex.org/W6726061053","https://openalex.org/W6726607834","https://openalex.org/W6735168207","https://openalex.org/W6740049204","https://openalex.org/W6742802039","https://openalex.org/W6743354777","https://openalex.org/W6743462201","https://openalex.org/W6744261651","https://openalex.org/W6745764667","https://openalex.org/W6745822563","https://openalex.org/W6746298117","https://openalex.org/W6746567100"],"related_works":["https://openalex.org/W2529301793","https://openalex.org/W2384121599","https://openalex.org/W2038083449","https://openalex.org/W3177678247","https://openalex.org/W1986772939","https://openalex.org/W2037635165","https://openalex.org/W2738829087","https://openalex.org/W2542062716","https://openalex.org/W1505346162","https://openalex.org/W4200562864"],"abstract_inverted_index":{"Speech":[0],"enhancement":[1,118,174],"model":[2,27,40,102,132,219],"is":[3,21,35,62,84,96,196,235],"used":[4,86],"to":[5,10,24,92,126,160,176,203,240],"map":[6],"a":[7,11,65,188],"noisy":[8],"speech":[9,55,83,117,148,190,201,228,234],"clean":[12,82],"speech.":[13,50],"In":[14,109],"the":[15,26,31,39,44,48,59,72,89,93,100,128,131,135,140,153,171,178,185,193,204,207,210,216,221,232,245],"training":[16,208],"stage,":[17],"an":[18,36,114,167],"objective":[19,67,164],"function":[20],"often":[22],"adopted":[23],"optimize":[25,162,177],"parameters.":[28],"However,":[29],"in":[30,53,87,107],"existing":[32],"literature,":[33],"there":[34,95],"inconsistency":[37],"between":[38,79,130,206],"optimization":[41,133],"criterion":[42,46],"and":[43,81,134,209,226],"evaluation":[45,60,136,211],"for":[47],"enhanced":[49,233],"For":[51],"example,":[52,168],"measuring":[54],"intelligibility,":[56],"most":[57],"of":[58,139,146,187,223],"metric":[61],"based":[63,74,243],"on":[64,231,244],"short-time":[66],"intelligibility":[68,222],"(STOI)":[69],"measure,":[70],"while":[71],"frame":[73],"mean":[75],"square":[76],"error":[77],"(MSE)":[78],"estimated":[80],"widely":[85],"optimizing":[88],"model.":[90],"Due":[91],"inconsistency,":[94],"no":[97],"guarantee":[98],"that":[99,184],"trained":[101],"can":[103,157],"provide":[104],"optimal":[105],"performance":[106],"applications.":[108],"this":[110],"study,":[111],"we":[112,169],"propose":[113],"end-to-end":[115],"utterance-based":[116,141],"framework":[119,175],"using":[120],"fully":[121],"convolutional":[122],"neural":[123],"networks":[124],"(FCN)":[125],"reduce":[127],"gap":[129],"criterion.":[137,248],"Because":[138],"optimization,":[142,220],"temporal":[143],"correlation":[144],"information":[145],"long":[147],"segments,":[149],"or":[150],"even":[151],"at":[152],"entire":[154],"utterance":[155],"level,":[156],"be":[158],"considered":[159],"directly":[161],"perception-based":[163],"functions.":[165],"As":[166],"implemented":[170],"proposed":[172,194],"FCN":[173],"STOI":[179,186,217],"measure.":[180],"Experimental":[181],"results":[182],"show":[183],"test":[189],"processed":[191],"by":[192,214],"approach":[195],"better":[197],"than":[198],"conventional":[199],"MSE-optimized":[200],"due":[202],"consistency":[205],"targets.":[212],"Moreover,":[213],"integrating":[215],"into":[218],"human":[224],"subjects":[225],"automatic":[227],"recognition":[229],"system":[230],"also":[236],"substantially":[237],"improved":[238],"compared":[239],"those":[241],"generated":[242],"minimum":[246],"MSE":[247]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":23},{"year":2024,"cited_by_count":34},{"year":2023,"cited_by_count":37},{"year":2022,"cited_by_count":45},{"year":2021,"cited_by_count":62},{"year":2020,"cited_by_count":65},{"year":2019,"cited_by_count":48},{"year":2018,"cited_by_count":20},{"year":2017,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
