{"id":"https://openalex.org/W3116424918","doi":"https://doi.org/10.1109/ictc49870.2020.9289445","title":"Multi-Scale Multi-Band Dilated DenseLSTM for Robust Recognition of Speech with Background Music","display_name":"Multi-Scale Multi-Band Dilated DenseLSTM for Robust Recognition of Speech with Background Music","publication_year":2020,"publication_date":"2020-10-21","ids":{"openalex":"https://openalex.org/W3116424918","doi":"https://doi.org/10.1109/ictc49870.2020.9289445","mag":"3116424918"},"language":"en","primary_location":{"id":"doi:10.1109/ictc49870.2020.9289445","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictc49870.2020.9289445","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Conference on Information and Communication Technology Convergence (ICTC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012083888","display_name":"Woon-Haeng Heo","orcid":null},"institutions":[{"id":"https://openalex.org/I163753206","display_name":"Chungbuk National University","ror":"https://ror.org/02wnxgj78","country_code":"KR","type":"education","lineage":["https://openalex.org/I163753206"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Woon-Haeng Heo","raw_affiliation_strings":["School of Electronics Engineering, Chungbuk National University, Cheongju, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Electronics Engineering, Chungbuk National University, Cheongju, South Korea","institution_ids":["https://openalex.org/I163753206"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076443181","display_name":"Hyemi Kim","orcid":"https://orcid.org/0000-0003-4713-4658"},"institutions":[{"id":"https://openalex.org/I142401562","display_name":"Electronics and Telecommunications Research Institute","ror":"https://ror.org/03ysstz10","country_code":"KR","type":"facility","lineage":["https://openalex.org/I142401562","https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyemi Kim","raw_affiliation_strings":["Creative Content Research Division, Electronics and Telecommunications Research Institute, Daejeon, South Korea"],"affiliations":[{"raw_affiliation_string":"Creative Content Research Division, Electronics and Telecommunications Research Institute, Daejeon, South Korea","institution_ids":["https://openalex.org/I142401562"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000031464","display_name":"Oh\u2010Wook Kwon","orcid":"https://orcid.org/0000-0002-5301-126X"},"institutions":[{"id":"https://openalex.org/I163753206","display_name":"Chungbuk National University","ror":"https://ror.org/02wnxgj78","country_code":"KR","type":"education","lineage":["https://openalex.org/I163753206"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Oh-Wook Kwon","raw_affiliation_strings":["School of Electronics Engineering, Chungbuk National University, Cheongju, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Electronics Engineering, Chungbuk National University, Cheongju, South Korea","institution_ids":["https://openalex.org/I163753206"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5012083888"],"corresponding_institution_ids":["https://openalex.org/I163753206"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16319947,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1238","last_page":"1241"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8208152055740356},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6839886903762817},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6677196621894836},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5761184096336365},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.5729864835739136},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5282914638519287},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.488059401512146},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.444341778755188},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4050646424293518},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10055449604988098}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8208152055740356},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6839886903762817},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6677196621894836},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5761184096336365},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.5729864835739136},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5282914638519287},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.488059401512146},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.444341778755188},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4050646424293518},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10055449604988098},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ictc49870.2020.9289445","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictc49870.2020.9289445","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 International Conference on Information and Communication Technology Convergence (ICTC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1524333225","https://openalex.org/W1552314771","https://openalex.org/W1836465849","https://openalex.org/W1974387177","https://openalex.org/W1975678038","https://openalex.org/W2052666245","https://openalex.org/W2077799289","https://openalex.org/W2078528584","https://openalex.org/W2116360511","https://openalex.org/W2127851351","https://openalex.org/W2141998673","https://openalex.org/W2156387975","https://openalex.org/W2397226255","https://openalex.org/W2626544737","https://openalex.org/W2669032454","https://openalex.org/W2678916739","https://openalex.org/W2774707525","https://openalex.org/W2897371647","https://openalex.org/W2945191446","https://openalex.org/W2963446712","https://openalex.org/W2963698842","https://openalex.org/W2963750251","https://openalex.org/W2963751183","https://openalex.org/W2963840672","https://openalex.org/W3010461307","https://openalex.org/W3016447038","https://openalex.org/W4298310324","https://openalex.org/W6631362777","https://openalex.org/W6638667902","https://openalex.org/W6644251649","https://openalex.org/W6682889407","https://openalex.org/W6696085341","https://openalex.org/W6712317276","https://openalex.org/W6746914816","https://openalex.org/W6751356808","https://openalex.org/W6766320909"],"related_works":["https://openalex.org/W2560215812","https://openalex.org/W4226493464","https://openalex.org/W4312417841","https://openalex.org/W3133861977","https://openalex.org/W2951211570","https://openalex.org/W3103566983","https://openalex.org/W3029198973","https://openalex.org/W3096184950","https://openalex.org/W4231424160","https://openalex.org/W2275432853"],"abstract_inverted_index":{"We":[0,110],"propose":[1],"a":[2,48,61,91,106,112],"multi-scale":[3],"multi-band":[4],"dilated":[5,49,58,84],"time-":[6],"frequency":[7],"DenseNet":[8],"with":[9],"LSTM":[10],"for":[11,78],"speech":[12,15,79,113,116],"enhancement":[13,114],"and":[14,115,123],"recognition.":[16],"In":[17,42,75],"the":[18,30,39,72,83,120,136,140],"convolutional":[19,64,92],"neural":[20,94],"network":[21,65,95,131],"(CNN)-":[22],"based":[23,89],"architecture,":[24],"it":[25],"is":[26],"important":[27],"to":[28,36,60,69,87,144],"increase":[29,71],"receptive":[31,73],"field":[32],"effectively":[33,70],"in":[34,67,102],"order":[35,68],"sufficiently":[37],"consider":[38],"context":[40],"information.":[41],"our":[43],"previous":[44],"study,":[45,77],"we":[46,81],"designed":[47],"dense":[50,85],"block":[51],"that":[52],"reflects":[53],"acoustic":[54],"characteristics":[55],"by":[56],"applying":[57],"convolutions":[59],"densely":[62],"connected":[63],"(DenseNet)":[66],"field.":[74],"this":[76],"enhancement,":[80],"apply":[82],"blocks":[86],"MMDenseLSTM":[88],"on":[90],"recurrent":[93],"(CRNN)":[96],"which":[97],"has":[98],"shown":[99],"good":[100],"performance":[101,142],"recent":[103],"studies":[104],"using":[105,119],"deep":[107,126,146],"learning":[108,127,147],"architecture.":[109],"conduct":[111],"recognition":[117],"experiment":[118],"proposed":[121,137],"architecture":[122,138],"several":[124],"existing":[125],"architectures:":[128],"Gated":[129],"residual":[130],"(GRN),":[132],"MMDenseLSTM,":[133],"DilDenseNet.":[134],"Overall,":[135],"shows":[139],"best":[141],"compared":[143],"other":[145],"architectures.":[148]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
