{"id":"https://openalex.org/W2962892438","doi":"https://doi.org/10.21437/interspeech.2018-1262","title":"Building State-of-the-art Distant Speech Recognition Using the CHiME-4 Challenge with a Setup of Speech Enhancement Baseline","display_name":"Building State-of-the-art Distant Speech Recognition Using the CHiME-4 Challenge with a Setup of Speech Enhancement Baseline","publication_year":2018,"publication_date":"2018-08-28","ids":{"openalex":"https://openalex.org/W2962892438","doi":"https://doi.org/10.21437/interspeech.2018-1262","mag":"2962892438"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2018-1262","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-1262","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065795147","display_name":"Szu-Jui Chen","orcid":"https://orcid.org/0000-0002-6406-9280"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Szu-Jui Chen","raw_affiliation_strings":["Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD 21218, USA"],"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD 21218, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048730867","display_name":"Aswin Shanmugam Subramanian","orcid":"https://orcid.org/0000-0003-4446-001X"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aswin Shanmugam Subramanian","raw_affiliation_strings":["Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD 21218, USA"],"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD 21218, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034399968","display_name":"Hainan Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hainan Xu","raw_affiliation_strings":["Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD 21218, USA"],"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD 21218, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD 21218, USA"],"affiliations":[{"raw_affiliation_string":"Center for Language and Speech Processing, Johns Hopkins University, Baltimore, MD 21218, USA","institution_ids":["https://openalex.org/I145311948"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5001291873"],"corresponding_institution_ids":["https://openalex.org/I145311948"],"apc_list":null,"apc_paid":null,"fwci":6.1407223,"has_fulltext":false,"cited_by_count":74,"citation_normalized_percentile":{"value":0.96885148,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1571","last_page":"1575"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9704999923706055,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.8245724439620972},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7054581642150879},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.681397020816803},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.455477237701416},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.05807691812515259},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.05427101254463196}],"concepts":[{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.8245724439620972},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7054581642150879},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.681397020816803},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.455477237701416},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.05807691812515259},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.05427101254463196},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2018-1262","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2018-1262","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W179875071","https://openalex.org/W1524333225","https://openalex.org/W1552314771","https://openalex.org/W2117671523","https://openalex.org/W2127851351","https://openalex.org/W2131342762","https://openalex.org/W2141998673","https://openalex.org/W2148613904","https://openalex.org/W2158143227","https://openalex.org/W2288645994","https://openalex.org/W2289394825","https://openalex.org/W2398042854","https://openalex.org/W2400997536","https://openalex.org/W2402146185","https://openalex.org/W2402268235","https://openalex.org/W2514741789","https://openalex.org/W2516001803","https://openalex.org/W2517616541","https://openalex.org/W2559260703","https://openalex.org/W2592384597","https://openalex.org/W2622203030","https://openalex.org/W2749475572","https://openalex.org/W2802422770","https://openalex.org/W2884797218","https://openalex.org/W2963399332","https://openalex.org/W3112742522"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W2352448290","https://openalex.org/W2380820513","https://openalex.org/W2005801347","https://openalex.org/W2372385138","https://openalex.org/W2186640231","https://openalex.org/W4296359239"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"a":[3,33,80,111],"new":[4],"baseline":[5,146],"system":[6,31,36,63,121],"for":[7,115,125,173,185],"automatic":[8],"speech":[9,24,59,151,165,169,186],"recognition":[10,60],"(ASR)":[11],"in":[12,23,43,56,130,140],"the":[13,18,39,44,53,57,88,92,104,126,131,137,141,144,174,178],"CHiME-4":[14],"challenge":[15],"to":[16,38,78,136],"promote":[17],"development":[19],"of":[20,91,164],"noisy":[21],"ASR":[22],"processing":[25],"communities":[26],"by":[27],"providing":[28],"1)":[29],"state-of-the-art":[30],"with":[32,68,98,189],"simplified":[34],"single":[35],"comparable":[37],"complicated":[40],"top":[41],"systems":[42],"challenge,":[45],"2)":[46],"publicly":[47],"available":[48],"and":[49,117,168],"reproducible":[50],"recipe":[51,147,179],"through":[52],"main":[54],"repository":[55],"Kaldi":[58],"toolkit.The":[61],"proposed":[62,145],"adopts":[64],"generalized":[65],"eigenvalue":[66],"beamforming":[67],"bidirectional":[69],"long":[70],"short-term":[71],"memory":[72],"(LSTM)":[73],"mask":[74],"estimation.We":[75],"also":[76,180],"propose":[77],"use":[79,110],"time":[81],"delay":[82],"neural":[83],"network":[84],"(TDNN)":[85],"based":[86],"on":[87],"lattice-free":[89],"version":[90],"maximum":[93],"mutual":[94],"information":[95],"(LF-MMI)":[96],"trained":[97],"augmented":[99],"all":[100],"six":[101],"microphones":[102],"plus":[103],"enhanced":[105],"data":[106],"after":[107],"beamforming.Finally,":[108],"we":[109],"LSTM":[112],"language":[113],"model":[114],"lattice":[116],"n-best":[118],"re-scoring.The":[119],"final":[120],"achieved":[122],"2.74%":[123],"WER":[124],"real":[127],"test":[128,176],"set":[129],"6-channel":[132],"track,":[133],"which":[134],"corresponds":[135],"2nd":[138],"place":[139],"challenge.In":[142],"addition,":[143],"includes":[148],"four":[149],"different":[150],"enhancement":[152,187],"measures,":[153],"short-time":[154],"objective":[155],"intelligibility":[156],"measure":[157],"(STOI),":[158],"extended":[159],"STOI":[160],"(eSTOI),":[161],"perceptual":[162],"evaluation":[163],"quality":[166],"(PESQ)":[167],"distortion":[170],"ratio":[171],"(SDR)":[172],"simulation":[175],"set.Thus,":[177],"provides":[181],"an":[182],"experimental":[183],"platform":[184],"studies":[188],"these":[190],"performance":[191],"measures.":[192]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":12},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":11},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":1}],"updated_date":"2026-02-20T08:17:22.645390","created_date":"2025-10-10T00:00:00"}
