{"id":"https://openalex.org/W2052117542","doi":"https://doi.org/10.1109/apsipa.2014.7041548","title":"Denoising autoencoder and environment adaptation for distant-talking speech recognition with asynchronous speech recording","display_name":"Denoising autoencoder and environment adaptation for distant-talking speech recognition with asynchronous speech recording","publication_year":2014,"publication_date":"2014-12-01","ids":{"openalex":"https://openalex.org/W2052117542","doi":"https://doi.org/10.1109/apsipa.2014.7041548","mag":"2052117542"},"language":"en","primary_location":{"id":"doi:10.1109/apsipa.2014.7041548","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipa.2014.7041548","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Signal and Information Processing Association Annual Summit and Conference (APSIPA), 2014 Asia-Pacific","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050763764","display_name":"Longbiao Wang","orcid":"https://orcid.org/0000-0002-4005-5036"},"institutions":[{"id":"https://openalex.org/I85922643","display_name":"Nagaoka University of Technology","ror":"https://ror.org/00ys1hz88","country_code":"JP","type":"education","lineage":["https://openalex.org/I85922643"]},{"id":"https://openalex.org/I119806805","display_name":"Nagaoka University","ror":"https://ror.org/02rcadd38","country_code":"JP","type":"education","lineage":["https://openalex.org/I119806805"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Longbiao Wang","raw_affiliation_strings":["Nagaoka University of Technology, Nagaoka, Japan","Nagaoka University of Technology, Nagaoka, 940-2188, Japan#TAB#"],"affiliations":[{"raw_affiliation_string":"Nagaoka University of Technology, Nagaoka, Japan","institution_ids":["https://openalex.org/I119806805","https://openalex.org/I85922643"]},{"raw_affiliation_string":"Nagaoka University of Technology, Nagaoka, 940-2188, Japan#TAB#","institution_ids":["https://openalex.org/I85922643"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041413561","display_name":"Bo Ren","orcid":"https://orcid.org/0000-0002-0481-5069"},"institutions":[{"id":"https://openalex.org/I119806805","display_name":"Nagaoka University","ror":"https://ror.org/02rcadd38","country_code":"JP","type":"education","lineage":["https://openalex.org/I119806805"]},{"id":"https://openalex.org/I85922643","display_name":"Nagaoka University of Technology","ror":"https://ror.org/00ys1hz88","country_code":"JP","type":"education","lineage":["https://openalex.org/I85922643"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Bo Ren","raw_affiliation_strings":["Nagaoka University of Technology, Nagaoka, Japan","Nagaoka University of Technology, Nagaoka, 940-2188, Japan#TAB#"],"affiliations":[{"raw_affiliation_string":"Nagaoka University of Technology, Nagaoka, Japan","institution_ids":["https://openalex.org/I119806805","https://openalex.org/I85922643"]},{"raw_affiliation_string":"Nagaoka University of Technology, Nagaoka, 940-2188, Japan#TAB#","institution_ids":["https://openalex.org/I85922643"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100901893","display_name":"Yuma Ueda","orcid":null},"institutions":[{"id":"https://openalex.org/I1298590031","display_name":"Shizuoka University","ror":"https://ror.org/01w6wtk13","country_code":"JP","type":"education","lineage":["https://openalex.org/I1298590031"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuma Ueda","raw_affiliation_strings":["Shizuoka University, Hamamatsu, Japan","Shizuoka University, Hamamatsu, 432-8561 Japan"],"affiliations":[{"raw_affiliation_string":"Shizuoka University, Hamamatsu, Japan","institution_ids":["https://openalex.org/I1298590031"]},{"raw_affiliation_string":"Shizuoka University, Hamamatsu, 432-8561 Japan","institution_ids":["https://openalex.org/I1298590031"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071123569","display_name":"Atsuhiko Kai","orcid":null},"institutions":[{"id":"https://openalex.org/I1298590031","display_name":"Shizuoka University","ror":"https://ror.org/01w6wtk13","country_code":"JP","type":"education","lineage":["https://openalex.org/I1298590031"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Atsuhiko Kai","raw_affiliation_strings":["Shizuoka University, Hamamatsu, Japan","Shizuoka University, Hamamatsu, 432-8561 Japan"],"affiliations":[{"raw_affiliation_string":"Shizuoka University, Hamamatsu, Japan","institution_ids":["https://openalex.org/I1298590031"]},{"raw_affiliation_string":"Shizuoka University, Hamamatsu, 432-8561 Japan","institution_ids":["https://openalex.org/I1298590031"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073867523","display_name":"Teraoka Shunta","orcid":null},"institutions":[{"id":"https://openalex.org/I1298590031","display_name":"Shizuoka University","ror":"https://ror.org/01w6wtk13","country_code":"JP","type":"education","lineage":["https://openalex.org/I1298590031"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Shunta Teraoka","raw_affiliation_strings":["Shizuoka University, Hamamatsu, Japan","Shizuoka University, Hamamatsu, 432-8561 Japan"],"affiliations":[{"raw_affiliation_string":"Shizuoka University, Hamamatsu, Japan","institution_ids":["https://openalex.org/I1298590031"]},{"raw_affiliation_string":"Shizuoka University, Hamamatsu, 432-8561 Japan","institution_ids":["https://openalex.org/I1298590031"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077621280","display_name":"Taku Fukushima","orcid":null},"institutions":[{"id":"https://openalex.org/I1298590031","display_name":"Shizuoka University","ror":"https://ror.org/01w6wtk13","country_code":"JP","type":"education","lineage":["https://openalex.org/I1298590031"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Taku Fukushima","raw_affiliation_strings":["Shizuoka University, Hamamatsu, Japan","Shizuoka University, Hamamatsu, 432-8561 Japan"],"affiliations":[{"raw_affiliation_string":"Shizuoka University, Hamamatsu, Japan","institution_ids":["https://openalex.org/I1298590031"]},{"raw_affiliation_string":"Shizuoka University, Hamamatsu, 432-8561 Japan","institution_ids":["https://openalex.org/I1298590031"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5050763764"],"corresponding_institution_ids":["https://openalex.org/I119806805","https://openalex.org/I85922643"],"apc_list":null,"apc_paid":null,"fwci":2.1014,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.89324638,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":"14","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.8361754417419434},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8255007266998291},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.6802730560302734},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.6107457280158997},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.6080330014228821},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.5099684000015259},{"id":"https://openalex.org/keywords/cepstrum","display_name":"Cepstrum","score":0.4796103239059448},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4458417296409607},{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.432343065738678},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4201200008392334},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.23454517126083374},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.09038135409355164},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08411714434623718}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.8361754417419434},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8255007266998291},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.6802730560302734},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.6107457280158997},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.6080330014228821},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.5099684000015259},{"id":"https://openalex.org/C88485024","wikidata":"https://www.wikidata.org/wiki/Q1054571","display_name":"Cepstrum","level":2,"score":0.4796103239059448},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4458417296409607},{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.432343065738678},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4201200008392334},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.23454517126083374},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.09038135409355164},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08411714434623718},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipa.2014.7041548","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipa.2014.7041548","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Signal and Information Processing Association Annual Summit and Conference (APSIPA), 2014 Asia-Pacific","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.4699999988079071,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320325763","display_name":"Telecommunications Advancement Foundation","ror":"https://ror.org/05y77zf79"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W236686179","https://openalex.org/W1973669708","https://openalex.org/W1982846521","https://openalex.org/W1989163463","https://openalex.org/W1989314204","https://openalex.org/W1991234575","https://openalex.org/W1992713442","https://openalex.org/W1998229164","https://openalex.org/W2005522781","https://openalex.org/W2037740282","https://openalex.org/W2047769394","https://openalex.org/W2053150333","https://openalex.org/W2069976350","https://openalex.org/W2100495367","https://openalex.org/W2103564015","https://openalex.org/W2107992675","https://openalex.org/W2110322414","https://openalex.org/W2112748425","https://openalex.org/W2114782757","https://openalex.org/W2128653836","https://openalex.org/W2130640611","https://openalex.org/W2130722890","https://openalex.org/W2132214945","https://openalex.org/W2142117481","https://openalex.org/W2145094598","https://openalex.org/W2151435578","https://openalex.org/W2160815625","https://openalex.org/W2166772024","https://openalex.org/W2296581541","https://openalex.org/W2402919203","https://openalex.org/W2405774341","https://openalex.org/W2997574889","https://openalex.org/W6648509342","https://openalex.org/W6676279616","https://openalex.org/W6681096077","https://openalex.org/W6681171195","https://openalex.org/W6697251715","https://openalex.org/W6713521420","https://openalex.org/W6713658392"],"related_works":["https://openalex.org/W2159052453","https://openalex.org/W3013693939","https://openalex.org/W2566616303","https://openalex.org/W3131327266","https://openalex.org/W2752972570","https://openalex.org/W4297051394","https://openalex.org/W1978230699","https://openalex.org/W2069431997","https://openalex.org/W2352700476","https://openalex.org/W2022349254"],"abstract_inverted_index":{"In":[0],"this":[1,63],"paper,":[2,64],"we":[3,87],"propose":[4],"a":[5,67,110,118,123,136],"robust":[6],"distant-talking":[7,51],"speech":[8,13,26,52,75,98],"recognition":[9,53],"system":[10,60,161],"with":[11,54,126,145],"asynchronous":[12,25,55,90],"recording.":[14],"This":[15],"is":[16],"implemented":[17],"by":[18,117,129],"combining":[19],"denoising":[20,68,138],"autoencoder-based":[21],"cepstral-domain":[22,137],"dereverberation,":[23],"automatic":[24,89,141],"(microphone":[27],"or":[28],"mobile":[29,38,56,91,102,132,142],"terminal)":[30],"selection":[31,93,144],"and":[32,79,94,120,140],"environment":[33,95,146],"adaptation.":[34],"Although":[35],"applications":[36],"using":[37,97,109,173],"terminals":[39],"have":[40],"attracted":[41],"increasing":[42],"attention,":[43],"there":[44],"are":[45],"few":[46],"studies":[47],"that":[48],"focus":[49],"on":[50],"terminals.":[57,103,133],"For":[58],"the":[59,71,148,159,165],"proposed":[61,105],"in":[62,70,122],"after":[65],"applying":[66],"autoencoder":[69,139],"cepstral":[72],"domain":[73],"of":[74,158],"to":[76,162],"suppress":[77],"reverberation":[78],"performing":[80],"Large":[81],"Vocabulary":[82],"Continuous":[83],"Speech":[84],"Recognition":[85],"(LVCSR),":[86],"adopted":[88],"terminal":[92,143],"adaptation":[96],"segments":[99],"from":[100,156],"optimal":[101],"The":[104],"method":[106],"was":[107,115,154,170],"evaluated":[108],"reverberant":[111],"WSJCAMO":[112],"corpus,":[113],"which":[114],"emitted":[116],"loudspeaker":[119],"recorded":[121],"meeting":[124],"room":[125],"multiple":[127,131],"speakers":[128],"far-field":[130],"By":[134],"integrating":[135],"adaptation,":[147],"average":[149],"Word":[150],"Error":[151],"Rate":[152],"(WER)":[153],"reduced":[155],"51.8%":[157],"baseline":[160],"28.8%,":[163],"i.e.,":[164],"relative":[166],"error":[167],"reduction":[168],"rate":[169],"44.4%":[171],"when":[172],"multi-condition":[174],"acoustic":[175],"models.":[176]},"counts_by_year":[{"year":2017,"cited_by_count":3},{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
