{"id":"https://openalex.org/W2076502758","doi":"https://doi.org/10.1109/taslp.2015.2456420","title":"An Auditory Inspired Amplitude Modulation Filter Bank for Robust Feature Extraction in Automatic Speech Recognition","display_name":"An Auditory Inspired Amplitude Modulation Filter Bank for Robust Feature Extraction in Automatic Speech Recognition","publication_year":2015,"publication_date":"2015-07-14","ids":{"openalex":"https://openalex.org/W2076502758","doi":"https://doi.org/10.1109/taslp.2015.2456420","mag":"2076502758"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2015.2456420","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2015.2456420","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071286568","display_name":"Niko Moritz","orcid":null},"institutions":[{"id":"https://openalex.org/I4210138578","display_name":"Fraunhofer Institute for Digital Media Technology","ror":"https://ror.org/04gp0yb49","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210138578","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Niko Moritz","raw_affiliation_strings":["Fraunhofer Institute for Digital Media Technology (IDMT), Project Group for Hearing, Speech, and Audio technology (HSA), Oldenburg, Germany","Project Group for Hearing, Speech, and Audio technology (HSA), Fraunhofer Institute for Digital Media Technology (IDMT), Oldenburg, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer Institute for Digital Media Technology (IDMT), Project Group for Hearing, Speech, and Audio technology (HSA), Oldenburg, Germany","institution_ids":["https://openalex.org/I4210138578"]},{"raw_affiliation_string":"Project Group for Hearing, Speech, and Audio technology (HSA), Fraunhofer Institute for Digital Media Technology (IDMT), Oldenburg, Germany","institution_ids":["https://openalex.org/I4210138578"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009092572","display_name":"J\u00f6rn Anem\u00fcller","orcid":"https://orcid.org/0000-0001-5564-5795"},"institutions":[{"id":"https://openalex.org/I129877168","display_name":"Carl von Ossietzky Universit\u00e4t Oldenburg","ror":"https://ror.org/033n9gh91","country_code":"DE","type":"education","lineage":["https://openalex.org/I129877168"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jorn Anemuller","raw_affiliation_strings":["University of Oldenburg, medical physics, Oldenburg, Germany"],"affiliations":[{"raw_affiliation_string":"University of Oldenburg, medical physics, Oldenburg, Germany","institution_ids":["https://openalex.org/I129877168"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076869895","display_name":"Birger Kollmeier","orcid":"https://orcid.org/0000-0001-8584-4779"},"institutions":[{"id":"https://openalex.org/I129877168","display_name":"Carl von Ossietzky Universit\u00e4t Oldenburg","ror":"https://ror.org/033n9gh91","country_code":"DE","type":"education","lineage":["https://openalex.org/I129877168"]},{"id":"https://openalex.org/I4210138578","display_name":"Fraunhofer Institute for Digital Media Technology","ror":"https://ror.org/04gp0yb49","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210138578","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Birger Kollmeier","raw_affiliation_strings":["Fraunhofer Institute for Digital Media Technology (IDMT), Project Group for Hearing, Speech, and Audio technology (HSA), Oldenburg, Germany","University of Oldenburg, medical physics, Oldenburg, Germany","Project Group for Hearing, Speech, and Audio technology (HSA), Fraunhofer Institute for Digital Media Technology (IDMT), Oldenburg, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer Institute for Digital Media Technology (IDMT), Project Group for Hearing, Speech, and Audio technology (HSA), Oldenburg, Germany","institution_ids":["https://openalex.org/I4210138578"]},{"raw_affiliation_string":"University of Oldenburg, medical physics, Oldenburg, Germany","institution_ids":["https://openalex.org/I129877168"]},{"raw_affiliation_string":"Project Group for Hearing, Speech, and Audio technology (HSA), Fraunhofer Institute for Digital Media Technology (IDMT), Oldenburg, Germany","institution_ids":["https://openalex.org/I4210138578"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5071286568"],"corresponding_institution_ids":["https://openalex.org/I4210138578"],"apc_list":null,"apc_paid":null,"fwci":4.8822,"has_fulltext":false,"cited_by_count":41,"citation_normalized_percentile":{"value":0.95802239,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"23","issue":"11","first_page":"1926","last_page":"1937"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/filter-bank","display_name":"Filter bank","score":0.7366589307785034},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6999022364616394},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6716605424880981},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.5653256773948669},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5275055170059204},{"id":"https://openalex.org/keywords/amplitude-modulation","display_name":"Amplitude modulation","score":0.5116837024688721},{"id":"https://openalex.org/keywords/reverberation","display_name":"Reverberation","score":0.511368453502655},{"id":"https://openalex.org/keywords/modulation","display_name":"Modulation (music)","score":0.4838794469833374},{"id":"https://openalex.org/keywords/psychoacoustics","display_name":"Psychoacoustics","score":0.4517490267753601},{"id":"https://openalex.org/keywords/frequency-modulation","display_name":"Frequency modulation","score":0.438480406999588},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.4383801221847534},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.4362420439720154},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.40179532766342163},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.381203830242157},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.3268475830554962},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.31830012798309326},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.1211322546005249},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.12068226933479309},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.116138756275177},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.07671046257019043}],"concepts":[{"id":"https://openalex.org/C100515483","wikidata":"https://www.wikidata.org/wiki/Q3268235","display_name":"Filter bank","level":3,"score":0.7366589307785034},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6999022364616394},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6716605424880981},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.5653256773948669},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5275055170059204},{"id":"https://openalex.org/C201905106","wikidata":"https://www.wikidata.org/wiki/Q183755","display_name":"Amplitude modulation","level":4,"score":0.5116837024688721},{"id":"https://openalex.org/C95851461","wikidata":"https://www.wikidata.org/wiki/Q468809","display_name":"Reverberation","level":2,"score":0.511368453502655},{"id":"https://openalex.org/C123079801","wikidata":"https://www.wikidata.org/wiki/Q750240","display_name":"Modulation (music)","level":2,"score":0.4838794469833374},{"id":"https://openalex.org/C9940772","wikidata":"https://www.wikidata.org/wiki/Q557399","display_name":"Psychoacoustics","level":3,"score":0.4517490267753601},{"id":"https://openalex.org/C11930861","wikidata":"https://www.wikidata.org/wiki/Q181417","display_name":"Frequency modulation","level":3,"score":0.438480406999588},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.4383801221847534},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4362420439720154},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.40179532766342163},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.381203830242157},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.3268475830554962},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.31830012798309326},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.1211322546005249},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.12068226933479309},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.116138756275177},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.07671046257019043},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2015.2456420","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2015.2456420","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:publica.fraunhofer.de:publica/241424","is_oa":false,"landing_page_url":"https://publica.fraunhofer.de/handle/publica/241424","pdf_url":null,"source":{"id":"https://openalex.org/S4306400318","display_name":"Fraunhofer-Publica (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"journal article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5699999928474426,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320879","display_name":"Deutsche Forschungsgemeinschaft","ror":"https://ror.org/018mejw64"},{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W98857008","https://openalex.org/W154677192","https://openalex.org/W164832740","https://openalex.org/W1524333225","https://openalex.org/W1627087495","https://openalex.org/W1972168487","https://openalex.org/W1975138161","https://openalex.org/W1989314204","https://openalex.org/W1990934990","https://openalex.org/W1991473673","https://openalex.org/W1999686891","https://openalex.org/W2012897754","https://openalex.org/W2018833920","https://openalex.org/W2041638389","https://openalex.org/W2046331056","https://openalex.org/W2046343773","https://openalex.org/W2052667477","https://openalex.org/W2068349492","https://openalex.org/W2068611059","https://openalex.org/W2072517993","https://openalex.org/W2075012882","https://openalex.org/W2076864159","https://openalex.org/W2082803386","https://openalex.org/W2088489891","https://openalex.org/W2090861223","https://openalex.org/W2096051479","https://openalex.org/W2101596234","https://openalex.org/W2103387126","https://openalex.org/W2108921739","https://openalex.org/W2110798204","https://openalex.org/W2115515280","https://openalex.org/W2126048084","https://openalex.org/W2130722890","https://openalex.org/W2137075158","https://openalex.org/W2151484683","https://openalex.org/W2152483743","https://openalex.org/W2159373586","https://openalex.org/W2165712214","https://openalex.org/W2166067691","https://openalex.org/W2167763959","https://openalex.org/W2334904945","https://openalex.org/W2399359657","https://openalex.org/W2400691855","https://openalex.org/W6631362777","https://openalex.org/W6676481782","https://openalex.org/W6702735670"],"related_works":["https://openalex.org/W2766680336","https://openalex.org/W2133320490","https://openalex.org/W4289830142","https://openalex.org/W3150393935","https://openalex.org/W1497065097","https://openalex.org/W2125446021","https://openalex.org/W2980055100","https://openalex.org/W2186790562","https://openalex.org/W2548564146","https://openalex.org/W2055480715"],"abstract_inverted_index":{"The":[0,92,154],"human":[1,124],"ability":[2],"to":[3,11,39,69,120,175],"classify":[4],"acoustic":[5,33],"sounds":[6],"is":[7,44,50,82,223],"still":[8],"unmatched":[9],"compared":[10,174],"recent":[12,177],"methods":[13],"in":[14,52,63,89,137],"machine":[15],"learning.":[16],"Psychoacoustic":[17],"and":[18,34,102,170,185,201,217,232],"physiological":[19],"studies":[20],"indicate":[21,130],"that":[22,46,81,116,143],"the":[23,60,96,131,144,167,205],"auditory":[24,61],"system":[25,62],"of":[26,59,95,113,213,230],"mammals":[27],"decomposes":[28],"audio":[29],"signals":[30],"into":[31],"their":[32],"modulation":[35,77,103,128,139],"frequency":[36,104],"components":[37],"prior":[38],"further":[40],"analysis.":[41],"Since":[42],"it":[43],"known":[45],"most":[47],"linguistic":[48],"information":[49,136],"coded":[51],"amplitude":[53,76,138],"fluctuations,":[54],"mimicking":[55],"temporal":[56],"processing":[57],"strategies":[58],"automatic":[64],"speech":[65],"recognition":[66,71],"(ASR)":[67],"promises":[68],"increase":[70],"accuracies.":[72],"We":[73,141],"present":[74],"an":[75,148,210],"filter":[78,145],"bank":[79],"(AMFB)":[80],"used":[83],"as":[84,182],"a":[85,109,227],"feature":[86,156,178],"extraction":[87,157,179],"scheme":[88,158],"ASR":[90,152,172],"systems.":[91],"time-frequency":[93],"resolution":[94],"employed":[97],"FIR":[98],"filters,":[99],"i.e.,":[100],"bandwidth":[101],"settings,":[105],"are":[106],"adopted":[107],"from":[108,123],"psychophysically":[110],"inspired":[111],"model":[112],"Dau":[114],"(1997)":[115],"was":[117],"originally":[118],"proposed":[119,155],"describe":[121],"data":[122],"psychoacoustics.":[125],"Investigations":[126],"on":[127,151],"phase":[129],"need":[132],"for":[133,208],"preserving":[134],"such":[135,181],"features.":[140,187],"show":[142],"symmetry":[146],"has":[147],"important":[149],"impact":[150],"performance.":[153],"exhibits":[159],"significant":[160],"word":[161],"error":[162],"rate":[163],"(WER)":[164],"reductions":[165],"using":[166],"Aurora-2,":[168],"Aurora-4,":[169],"REVERB":[171],"tasks":[173],"other":[176],"methods,":[180],"MFCC,":[183],"FDLP,":[184],"PNCC":[186],"Thereby,":[188],"AMFB":[189],"features":[190,222],"reveal":[191],"high":[192],"robustness":[193],"against":[194],"additive":[195],"noise,":[196],"different":[197],"transmission":[198],"channel":[199],"characteristics,":[200],"room":[202],"reverberation.":[203],"Using":[204],"Aurora-4":[206],"benchmark,":[207],"instance,":[209],"average":[211],"WER":[212],"12.33%":[214],"with":[215,219],"raw":[216,235],"11.31%":[218],"bottleneck":[220],"transformed":[221],"attained,":[224],"which":[225],"constitutes":[226],"relative":[228],"improvement":[229],"19.6%":[231],"29.2%":[233],"over":[234],"MFCC":[236],"features,":[237],"respectively.":[238]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":7},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
