{"id":"https://openalex.org/W2799566650","doi":"https://doi.org/10.1109/icassp.2018.8462690","title":"A Generative Auditory Model Embedded Neural Network for Speech Processing","display_name":"A Generative Auditory Model Embedded Neural Network for Speech Processing","publication_year":2018,"publication_date":"2018-04-01","ids":{"openalex":"https://openalex.org/W2799566650","doi":"https://doi.org/10.1109/icassp.2018.8462690","mag":"2799566650"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2018.8462690","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462690","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006670099","display_name":"Yu-Wen Lo","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu-Wen Lo","raw_affiliation_strings":["Department of Electrical and Computer Engineering, National Chiao Tung University, Hsinchu, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063025053","display_name":"Yih-Liang Shen","orcid":"https://orcid.org/0000-0003-4789-6695"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yih-Liang Shen","raw_affiliation_strings":["Department of Electrical and Computer Engineering, National Chiao Tung University, Hsinchu, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082271172","display_name":"Yuan\u2010Fu Liao","orcid":"https://orcid.org/0000-0003-0191-2178"},"institutions":[{"id":"https://openalex.org/I118292597","display_name":"National Taipei University of Technology","ror":"https://ror.org/00cn92c09","country_code":"TW","type":"education","lineage":["https://openalex.org/I118292597"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yuan-Fu Liao","raw_affiliation_strings":["Department of Electronic Engineering, National Taipei University of Technology, Taipei, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electronic Engineering, National Taipei University of Technology, Taipei, Taiwan","institution_ids":["https://openalex.org/I118292597"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088864528","display_name":"Tai-Shih Chi","orcid":"https://orcid.org/0000-0002-0584-8399"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Tai-Shih Chi","raw_affiliation_strings":["Department of Electrical and Computer Engineering, National Chiao Tung University, Hsinchu, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, National Chiao Tung University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I148366613"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1663,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.42175585,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"17","issue":null,"first_page":"5179","last_page":"5183"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7676896452903748},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7324004173278809},{"id":"https://openalex.org/keywords/computational-auditory-scene-analysis","display_name":"Computational auditory scene analysis","score":0.6378222703933716},{"id":"https://openalex.org/keywords/auditory-cortex","display_name":"Auditory cortex","score":0.6292312145233154},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5807361602783203},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.5465589761734009},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5104633569717407},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4937454164028168},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.48174145817756653},{"id":"https://openalex.org/keywords/auditory-system","display_name":"Auditory system","score":0.4578278660774231},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.4155530333518982},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3784312605857849},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3692642152309418},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.3234984576702118},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.18021273612976074},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.07284995913505554}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7676896452903748},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7324004173278809},{"id":"https://openalex.org/C73208851","wikidata":"https://www.wikidata.org/wiki/Q5157303","display_name":"Computational auditory scene analysis","level":2,"score":0.6378222703933716},{"id":"https://openalex.org/C2780297895","wikidata":"https://www.wikidata.org/wiki/Q18676","display_name":"Auditory cortex","level":2,"score":0.6292312145233154},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5807361602783203},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5465589761734009},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5104633569717407},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4937454164028168},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.48174145817756653},{"id":"https://openalex.org/C2777443451","wikidata":"https://www.wikidata.org/wiki/Q821413","display_name":"Auditory system","level":2,"score":0.4578278660774231},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.4155530333518982},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3784312605857849},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3692642152309418},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3234984576702118},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.18021273612976074},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.07284995913505554},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2018.8462690","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2018.8462690","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5600000023841858,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W170247711","https://openalex.org/W1533416326","https://openalex.org/W1974387177","https://openalex.org/W2001571181","https://openalex.org/W2006129368","https://openalex.org/W2039554088","https://openalex.org/W2054139811","https://openalex.org/W2062789884","https://openalex.org/W2142709321","https://openalex.org/W2291877678","https://openalex.org/W2296167893","https://openalex.org/W2402362233","https://openalex.org/W2542605056","https://openalex.org/W2718701456","https://openalex.org/W6740130871"],"related_works":["https://openalex.org/W1974374655","https://openalex.org/W2077650285","https://openalex.org/W3156212426","https://openalex.org/W3169261136","https://openalex.org/W2084000818","https://openalex.org/W2953132115","https://openalex.org/W2360086260","https://openalex.org/W2786424703","https://openalex.org/W2063103719","https://openalex.org/W3090629579"],"abstract_inverted_index":{"Before":[0],"the":[1,4,32,38,71,77,81,84,90,94,111,128],"era":[2],"of":[3,34,68,73,86],"neural":[5],"network":[6],"(NN),":[7],"features":[8],"extracted":[9],"from":[10],"auditory":[11,35,56,65,95,112],"models":[12,36],"have":[13],"been":[14,21],"applied":[15],"to":[16,59],"various":[17],"speech":[18,61],"applications":[19],"and":[20,83],"demonstrated":[22],"more":[23,118],"robust":[24,119],"against":[25,120],"noise":[26],"than":[27,127],"conventional":[28],"speech-processing":[29],"features.":[30],"What's":[31],"role":[33],"in":[37,76,89,101,123,131],"current":[39],"NN":[40,52,98,115,130],"era?":[41],"Are":[42],"they":[43],"obsolete?":[44],"To":[45],"answer":[46],"this":[47],"question,":[48],"we":[49],"construct":[50],"a":[51,54,102],"with":[53],"generative":[55,64],"model":[57,66,113],"embedded":[58,114],"process":[60],"signals.":[62],"The":[63,97],"consists":[67],"two":[69],"stages,":[70],"stage":[72,85],"spectrum":[74],"estimation":[75],"logarithmic-frequency":[78],"axis":[79],"by":[80,93],"cochlea":[82],"spectral-temporal":[87],"analysis":[88],"modulation":[91],"domain":[92],"cortex.":[96],"is":[99,116],"evaluated":[100],"simple":[103],"speaker":[104,132],"identification":[105],"task.":[106],"Experiment":[107],"results":[108],"show":[109],"that":[110],"still":[117],"noise,":[121],"especially":[122],"low":[124],"SNR":[125],"conditions,":[126],"randomly-initialized":[129],"identification.":[133]},"counts_by_year":[{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
