{"id":"https://openalex.org/W2546447191","doi":"https://doi.org/10.1109/iscslp.2016.7918400","title":"Deep neural network for robust speech recognition with auxiliary features from laser-Doppler vibrometer sensor","display_name":"Deep neural network for robust speech recognition with auxiliary features from laser-Doppler vibrometer sensor","publication_year":2016,"publication_date":"2016-10-01","ids":{"openalex":"https://openalex.org/W2546447191","doi":"https://doi.org/10.1109/iscslp.2016.7918400","mag":"2546447191"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp.2016.7918400","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2016.7918400","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 10th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112098283","display_name":"Zhipeng Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhipeng Xie","raw_affiliation_strings":["NELSLIP, The University of Science and Technology of China, Hefei, PRC"],"affiliations":[{"raw_affiliation_string":"NELSLIP, The University of Science and Technology of China, Hefei, PRC","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066595711","display_name":"Jun Du","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Du","raw_affiliation_strings":["NELSLIP, The University of Science and Technology of China, Hefei, PRC"],"affiliations":[{"raw_affiliation_string":"NELSLIP, The University of Science and Technology of China, Hefei, PRC","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000620878","display_name":"Ian McLoughlin","orcid":"https://orcid.org/0000-0001-7111-2008"},"institutions":[{"id":"https://openalex.org/I167056439","display_name":"Medway School of Pharmacy","ror":"https://ror.org/00fa9v295","country_code":"GB","type":"education","lineage":["https://openalex.org/I167056439"]},{"id":"https://openalex.org/I20581793","display_name":"University of Kent","ror":"https://ror.org/00xkeyj56","country_code":"GB","type":"education","lineage":["https://openalex.org/I20581793"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ian McLoughlin","raw_affiliation_strings":["School of Computing, University of Kent, Medway, UK"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Kent, Medway, UK","institution_ids":["https://openalex.org/I167056439","https://openalex.org/I20581793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102746357","display_name":"Yong Xu","orcid":"https://orcid.org/0000-0001-9934-3369"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yong Xu","raw_affiliation_strings":["iFlytek Research"],"affiliations":[{"raw_affiliation_string":"iFlytek Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061764077","display_name":"Feng Ma","orcid":"https://orcid.org/0000-0001-7067-5706"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng Ma","raw_affiliation_strings":["iFlytek Research"],"affiliations":[{"raw_affiliation_string":"iFlytek Research","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045610213","display_name":"Haikun Wang","orcid":"https://orcid.org/0000-0002-2565-5543"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haikun Wang","raw_affiliation_strings":["iFlytek Research"],"affiliations":[{"raw_affiliation_string":"iFlytek Research","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5112098283"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.4443,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.80079025,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/laser-doppler-vibrometer","display_name":"Laser Doppler vibrometer","score":0.8530873656272888},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5866590738296509},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.576181948184967},{"id":"https://openalex.org/keywords/laser-scanning-vibrometry","display_name":"Laser scanning vibrometry","score":0.5761656761169434},{"id":"https://openalex.org/keywords/doppler-effect","display_name":"Doppler effect","score":0.510515570640564},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4691043794155121},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.4368906617164612},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3487601578235626},{"id":"https://openalex.org/keywords/laser","display_name":"Laser","score":0.2507396936416626},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.13860413432121277},{"id":"https://openalex.org/keywords/laser-beams","display_name":"Laser beams","score":0.08954712748527527},{"id":"https://openalex.org/keywords/optics","display_name":"Optics","score":0.07799530029296875}],"concepts":[{"id":"https://openalex.org/C96199931","wikidata":"https://www.wikidata.org/wiki/Q1724132","display_name":"Laser Doppler vibrometer","level":4,"score":0.8530873656272888},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5866590738296509},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.576181948184967},{"id":"https://openalex.org/C97034920","wikidata":"https://www.wikidata.org/wiki/Q973913","display_name":"Laser scanning vibrometry","level":5,"score":0.5761656761169434},{"id":"https://openalex.org/C142757262","wikidata":"https://www.wikidata.org/wiki/Q76436","display_name":"Doppler effect","level":2,"score":0.510515570640564},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4691043794155121},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.4368906617164612},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3487601578235626},{"id":"https://openalex.org/C520434653","wikidata":"https://www.wikidata.org/wiki/Q38867","display_name":"Laser","level":2,"score":0.2507396936416626},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.13860413432121277},{"id":"https://openalex.org/C2984025587","wikidata":"https://www.wikidata.org/wiki/Q38867","display_name":"Laser beams","level":3,"score":0.08954712748527527},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.07799530029296875},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/iscslp.2016.7918400","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp.2016.7918400","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 10th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},{"id":"pmh:oai:kar.kent.ac.uk:57111","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ISCSLP.2016.7918400>)","pdf_url":null,"source":{"id":"https://openalex.org/S4377196264","display_name":"Kent Academic Repository (University of Kent)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I20581793","host_organization_name":"University of Kent","host_organization_lineage":["https://openalex.org/I20581793"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5099999904632568,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1598508708","https://openalex.org/W1971845863","https://openalex.org/W1992272902","https://openalex.org/W1992475611","https://openalex.org/W2044821519","https://openalex.org/W2059644683","https://openalex.org/W2062164080","https://openalex.org/W2080921589","https://openalex.org/W2112933903","https://openalex.org/W2126261833","https://openalex.org/W2128127640","https://openalex.org/W2147768505","https://openalex.org/W2157590573","https://openalex.org/W2162449135","https://openalex.org/W2170982194","https://openalex.org/W2213952365","https://openalex.org/W2291652038","https://openalex.org/W2399557756","https://openalex.org/W2408713104","https://openalex.org/W6678604152","https://openalex.org/W6678978470","https://openalex.org/W6683815549","https://openalex.org/W6712706723","https://openalex.org/W6713729801"],"related_works":["https://openalex.org/W3204309621","https://openalex.org/W3017341204","https://openalex.org/W1999246021","https://openalex.org/W1668045430","https://openalex.org/W3008679284","https://openalex.org/W2368432289","https://openalex.org/W1984298144","https://openalex.org/W2046805728","https://openalex.org/W2070131892","https://openalex.org/W2128816944"],"abstract_inverted_index":{"Recently,":[0],"the":[1,15,25,43,59,68,76,91,152,155,164],"signal":[2,27,45,125],"captured":[3],"from":[4,42,120,134,154],"a":[5,62,102,121,135],"laser":[6],"Doppler":[7],"vibrometer":[8],"(LDV)":[9],"sensor":[10],"been":[11],"used":[12,47,129],"to":[13,29,52,95,107,111,130,171],"improve":[14,54],"noise":[16],"robustness":[17],"automatic":[18],"speech":[19,100,137],"recognition":[20],"(ASR)":[21],"systems":[22],"by":[23],"enhancing":[24],"acoustic":[26,50,69,92,109,181],"prior":[28],"feature":[30],"extraction.":[31],"This":[32,114],"study":[33],"proposes":[34],"another":[35],"approach":[36,73],"in":[37,88,184],"which":[38],"auxiliary":[39],"features":[40,51,110,133,153,168,182],"extracted":[41],"LDV":[44,86,112,158],"are":[46,169],"alongside":[48],"conventional":[49],"further":[53],"ASR":[55,82,145,174],"performance":[56],"based":[57],"on":[58],"use":[60],"of":[61,143],"deep":[63],"neural":[64],"network":[65],"(DNN)":[66],"as":[67,161,163],"model.":[70],"While":[71],"this":[72],"is":[74,105,117],"promising,":[75],"best":[77],"training":[78,142],"data":[79,87,126,138,159],"sets":[80],"for":[81,140],"do":[83],"not":[84],"include":[85],"parallel":[89,124,141],"with":[90],"signal.":[93],"Thus,":[94],"leverage":[96],"such":[97],"existing":[98],"large-scale":[99],"databases,":[101],"regression":[103,115],"DNN":[104,116],"designed":[106],"map":[108],"features.":[113],"well":[118,162],"trained":[119],"limited":[122,156],"size":[123],"set,":[127],"then":[128],"form":[131],"pseudo-LDV":[132,167],"massive":[136,165],"set":[139,160],"an":[144,173],"system.":[146],"Our":[147],"experiments":[148],"show":[149],"that":[150,176],"both":[151,185],"scale":[157,166],"able":[170],"train":[172],"system":[175],"significantly":[177],"outperforms":[178],"one":[179],"using":[180],"alone,":[183],"quiet":[186],"and":[187],"noisy":[188],"environments.":[189]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}
