{"id":"https://openalex.org/W2972481755","doi":"https://doi.org/10.21437/interspeech.2019-2792","title":"Coarse-to-Fine Optimization for Speech Enhancement","display_name":"Coarse-to-Fine Optimization for Speech Enhancement","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2972481755","doi":"https://doi.org/10.21437/interspeech.2019-2792","mag":"2972481755"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2019-2792","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2792","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011274467","display_name":"Jian Yao","orcid":"https://orcid.org/0000-0002-9134-5084"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jian Yao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5013056064","display_name":"Ahmad Al-Dahle","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmad Al-Dahle","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5011274467"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.8196,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.9158586,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2743","last_page":"2747"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9337000250816345,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10662","display_name":"Ultrasonics and Acoustic Wave Propagation","score":0.9150999784469604,"subfield":{"id":"https://openalex.org/subfields/2211","display_name":"Mechanics of Materials"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7258086204528809},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6155228614807129},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.607358455657959},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6044244766235352},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5604180693626404},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.5431503653526306},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5346860885620117},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.5033430457115173},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47419267892837524},{"id":"https://openalex.org/keywords/generative-adversarial-network","display_name":"Generative adversarial network","score":0.4149368405342102},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38929256796836853},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.3631416857242584},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15399715304374695},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.06294676661491394}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7258086204528809},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6155228614807129},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.607358455657959},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6044244766235352},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5604180693626404},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.5431503653526306},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5346860885620117},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.5033430457115173},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47419267892837524},{"id":"https://openalex.org/C2988773926","wikidata":"https://www.wikidata.org/wiki/Q25104379","display_name":"Generative adversarial network","level":3,"score":0.4149368405342102},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38929256796836853},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3631416857242584},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15399715304374695},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.06294676661491394},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2019-2792","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-2792","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.699999988079071,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4389116644","https://openalex.org/W2153315159","https://openalex.org/W3103844505","https://openalex.org/W259157601","https://openalex.org/W4205463238","https://openalex.org/W2761785940","https://openalex.org/W1482209366","https://openalex.org/W2110523656","https://openalex.org/W2619127353","https://openalex.org/W2904948340"],"abstract_inverted_index":{"In":[0,98],"this":[1,58,99],"paper,":[2],"we":[3,128],"propose":[4,143],"the":[5,9,34,38,43,75,89,101,107,113,130,134,150,164],"coarse-to-fine":[6,72,118,131],"optimization":[7,119],"for":[8,79],"task":[10],"of":[11,28,37,115],"speech":[12,29,40,67,103],"enhancement.":[13],"Cosine":[14],"similarity":[15,27,46,77],"loss":[16,47,59,78,136,152,161],"[1]":[17],"has":[18],"proven":[19],"to":[20,25,33,64,88,94,133,156],"be":[21,62],"an":[22],"effective":[23],"metric":[24],"measure":[26],"signals.":[30],"However,":[31],"due":[32],"large":[35],"variance":[36],"enhanced":[39,66,102],"with":[41,57,68,171],"even":[42],"same":[44],"cosine":[45,76],"in":[48,120,137],"high":[49,92],"dimensional":[50],"space,":[51],"a":[52],"deep":[53],"neural":[54],"network":[55,140],"learnt":[56],"might":[60],"not":[61],"able":[63],"predict":[65],"good":[69],"quality.":[70],"Our":[71],"strategy":[73,132],"optimizes":[74],"different":[80],"granularities":[81],"so":[82],"that":[83],"more":[84],"constraints":[85],"are":[86],"added":[87],"prediction":[90],"from":[91,153],"dimension":[93],"relatively":[95],"low":[96],"dimension.":[97],"way,":[100],"will":[104],"better":[105],"resemble":[106],"clean":[108],"speech.":[109],"Experimental":[110],"results":[111,169],"show":[112],"effectiveness":[114],"our":[116],"proposed":[117],"both":[121],"discriminative":[122],"models":[123],"and":[124,142,166],"generative":[125,138,173],"models.":[126,174],"Moreover,":[127],"apply":[129],"adversarial":[135,139,151],"(GAN)":[141],"dynamic":[144],"perceptual":[145,160],"loss,":[146],"which":[147],"dynamically":[148],"computes":[149],"coarse":[154],"resolution":[155],"fine":[157],"resolution.":[158],"Dynamic":[159],"further":[162],"improves":[163],"accuracy":[165],"achieves":[167],"state-of-the-art":[168],"compared":[170],"other":[172]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
