{"id":"https://openalex.org/W2288217446","doi":"https://doi.org/10.1109/asru.2015.7404770","title":"Speaker location and microphone spacing invariant acoustic modeling from raw multichannel waveforms","display_name":"Speaker location and microphone spacing invariant acoustic modeling from raw multichannel waveforms","publication_year":2015,"publication_date":"2015-12-01","ids":{"openalex":"https://openalex.org/W2288217446","doi":"https://doi.org/10.1109/asru.2015.7404770","mag":"2288217446"},"language":"en","primary_location":{"id":"doi:10.1109/asru.2015.7404770","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2015.7404770","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)","raw_type":"proceedings-article"},"type":"conference-paper","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070513394","display_name":"Tara N. Sainath","orcid":"https://orcid.org/0000-0002-4126-6556"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tara N. Sainath","raw_affiliation_strings":["Google, Inc., New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Inc., New York, NY, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103273436","display_name":"Ron J. Weiss","orcid":"https://orcid.org/0000-0003-2010-4053"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ron J. Weiss","raw_affiliation_strings":["Google, Inc., New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Inc., New York, NY, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087313939","display_name":"Kevin Wilson","orcid":"https://orcid.org/0000-0001-9141-2219"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin W. Wilson","raw_affiliation_strings":["Google, Inc., New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Inc., New York, NY, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000078382","display_name":"Arun Narayanan","orcid":"https://orcid.org/0009-0008-3325-8928"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arun Narayanan","raw_affiliation_strings":["Google, Inc., New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Inc., New York, NY, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049614700","display_name":"Michiel Bacchiani","orcid":"https://orcid.org/0000-0003-4527-0197"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michiel Bacchiani","raw_affiliation_strings":["Google, Inc., New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, Inc., New York, NY, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109495890","display_name":"Andrew","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew","raw_affiliation_strings":["Google Inc, Mountain View, CA, US"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Inc, Mountain View, CA, US","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":50,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"30","last_page":"36"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7832028269767761},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.7019385695457458},{"id":"https://openalex.org/keywords/oracle","display_name":"Oracle","score":0.6802501678466797},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.6405895352363586},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5905174612998962},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.5284568667411804},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5240921378135681},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5188871026039124},{"id":"https://openalex.org/keywords/microphone-array","display_name":"Microphone array","score":0.47972172498703003},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4534587264060974},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.3836577832698822},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3301442265510559},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.2825896739959717},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11960181593894958},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.09400957822799683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7832028269767761},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.7019385695457458},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.6802501678466797},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.6405895352363586},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5905174612998962},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.5284568667411804},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5240921378135681},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5188871026039124},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.47972172498703003},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4534587264060974},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.3836577832698822},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3301442265510559},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2825896739959717},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11960181593894958},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.09400957822799683},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C146978453","wikidata":"https://www.wikidata.org/wiki/Q3798668","display_name":"Aerospace engineering","level":1,"score":0.0},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.0},{"id":"https://openalex.org/C554190296","wikidata":"https://www.wikidata.org/wiki/Q47528","display_name":"Radar","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru.2015.7404770","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru.2015.7404770","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1515932869","https://openalex.org/W1533861849","https://openalex.org/W1542280630","https://openalex.org/W1600744878","https://openalex.org/W1831449718","https://openalex.org/W2035846950","https://openalex.org/W2060108923","https://openalex.org/W2061074721","https://openalex.org/W2112739286","https://openalex.org/W2117678320","https://openalex.org/W2119203697","https://openalex.org/W2121573996","https://openalex.org/W2130361043","https://openalex.org/W2168231600","https://openalex.org/W2172097686","https://openalex.org/W2293634267","https://openalex.org/W2398826216","https://openalex.org/W2616139854","https://openalex.org/W2962719052","https://openalex.org/W4249052411","https://openalex.org/W4297801963","https://openalex.org/W6631943919","https://openalex.org/W6638670064","https://openalex.org/W6684859321","https://openalex.org/W6696934422","https://openalex.org/W6712560600"],"related_works":["https://openalex.org/W1879255185","https://openalex.org/W2769861442","https://openalex.org/W2120442551","https://openalex.org/W1980506188","https://openalex.org/W2900122540","https://openalex.org/W4240587264","https://openalex.org/W2011788874","https://openalex.org/W2041060376","https://openalex.org/W2963983801","https://openalex.org/W3119734852"],"abstract_inverted_index":{"Multichannel":[0],"ASR":[1],"systems":[2],"commonly":[3],"use":[4],"separate":[5],"modules":[6],"to":[7,21,68,71,119],"perform":[8],"speech":[9],"enhancement":[10,24],"and":[11,80],"acoustic":[12,28],"modeling.":[13],"In":[14],"this":[15],"paper,":[16],"we":[17,97],"present":[18],"an":[19],"algorithm":[20],"do":[22],"multichannel":[23],"jointly":[25],"with":[26],"the":[27,64,77,93],"model,":[29],"using":[30,107],"a":[31,54,85,102,114,120],"raw":[32],"waveform":[33],"convolutional":[34],"LSTM":[35],"deep":[36],"neural":[37],"network":[38,66,103],"(CLDNN).":[39],"We":[40],"will":[41],"show":[42,98],"that":[43,63,87,99,116],"our":[44],"proposed":[45,65],"method":[46],"offers":[47],"~5%":[48],"relative":[49],"improvement":[50],"in":[51,113],"WER":[52],"over":[53],"log-mel":[55],"CLDNN":[56],"trained":[57],"on":[58,104],"multiple":[59,108],"channels.":[60],"Analysis":[61],"shows":[62],"learns":[67],"be":[69],"robust":[70,118],"varying":[72],"angles":[73],"of":[74,92,122],"arrival":[75],"for":[76],"target":[78],"speaker,":[79],"performs":[81],"as":[82,84],"well":[83],"model":[86,115],"is":[88,117],"given":[89],"oracle":[90],"knowledge":[91],"true":[94],"location.":[95],"Finally,":[96],"training":[100],"such":[101],"inputs":[105],"captured":[106],"(linear)":[109],"array":[110],"configurations":[111],"results":[112],"range":[121],"microphone":[123],"spacings.":[124]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":12},{"year":2016,"cited_by_count":5}],"updated_date":"2026-07-14T23:27:15.235271","created_date":"2025-10-10T00:00:00"}