{"id":"https://openalex.org/W3168719366","doi":"https://doi.org/10.1109/iwaenc53105.2022.9914704","title":"Do You Listen with one or two Microphones? A Unified ASR Model for Single and Multi-Channel Audio","display_name":"Do You Listen with one or two Microphones? A Unified ASR Model for Single and Multi-Channel Audio","publication_year":2022,"publication_date":"2022-09-05","ids":{"openalex":"https://openalex.org/W3168719366","doi":"https://doi.org/10.1109/iwaenc53105.2022.9914704","mag":"3168719366"},"language":"en","primary_location":{"id":"doi:10.1109/iwaenc53105.2022.9914704","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwaenc53105.2022.9914704","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Workshop on Acoustic Signal Enhancement (IWAENC)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026246923","display_name":"Gokce Keskin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Gokce Keskin","raw_affiliation_strings":["Amazon.com"],"affiliations":[{"raw_affiliation_string":"Amazon.com","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060740546","display_name":"Minhua Wu","orcid":"https://orcid.org/0000-0001-9798-2984"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Minhua Wu","raw_affiliation_strings":["Amazon.com"],"affiliations":[{"raw_affiliation_string":"Amazon.com","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027102522","display_name":"Brian King","orcid":"https://orcid.org/0000-0002-5300-5564"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Brian King","raw_affiliation_strings":["Amazon.com"],"affiliations":[{"raw_affiliation_string":"Amazon.com","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030389494","display_name":"Harish Mallidi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Harish Mallidi","raw_affiliation_strings":["Amazon.com"],"affiliations":[{"raw_affiliation_string":"Amazon.com","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076706630","display_name":"Yang Gao","orcid":"https://orcid.org/0000-0002-3282-1618"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Yang Gao","raw_affiliation_strings":["Amazon.com"],"affiliations":[{"raw_affiliation_string":"Amazon.com","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012153296","display_name":"Jasha Droppo","orcid":"https://orcid.org/0000-0001-6097-0090"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jasha Droppo","raw_affiliation_strings":["Amazon.com"],"affiliations":[{"raw_affiliation_string":"Amazon.com","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110230355","display_name":"Ariya Rastrow","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ariya Rastrow","raw_affiliation_strings":["Amazon.com"],"affiliations":[{"raw_affiliation_string":"Amazon.com","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090454997","display_name":"Roland Maas","orcid":null},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Roland Maas","raw_affiliation_strings":["Amazon.com"],"affiliations":[{"raw_affiliation_string":"Amazon.com","institution_ids":["https://openalex.org/I4210089985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5026246923"],"corresponding_institution_ids":["https://openalex.org/I4210089985"],"apc_list":null,"apc_paid":null,"fwci":0.19495729,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.34832311,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.829511284828186},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5886233448982239},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5613021850585938},{"id":"https://openalex.org/keywords/upload","display_name":"Upload","score":0.5145039558410645},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5127221345901489},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4487732946872711},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4391530156135559},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.10653871297836304}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.829511284828186},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5886233448982239},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5613021850585938},{"id":"https://openalex.org/C71901391","wikidata":"https://www.wikidata.org/wiki/Q7126699","display_name":"Upload","level":2,"score":0.5145039558410645},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5127221345901489},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4487732946872711},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4391530156135559},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10653871297836304},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iwaenc53105.2022.9914704","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iwaenc53105.2022.9914704","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Workshop on Acoustic Signal Enhancement (IWAENC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W81726370","https://openalex.org/W102482883","https://openalex.org/W330298975","https://openalex.org/W2041768951","https://openalex.org/W2113556376","https://openalex.org/W2127141656","https://openalex.org/W2242685705","https://openalex.org/W2291522532","https://openalex.org/W2334833135","https://openalex.org/W2398972335","https://openalex.org/W2515439472","https://openalex.org/W2539797148","https://openalex.org/W2589857635","https://openalex.org/W2600628583","https://openalex.org/W2890553422","https://openalex.org/W2891997645","https://openalex.org/W2921496354","https://openalex.org/W2962760690","https://openalex.org/W2972430654","https://openalex.org/W3038604153","https://openalex.org/W3086154751"],"related_works":["https://openalex.org/W2944823289","https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W3037018281","https://openalex.org/W2352448290","https://openalex.org/W2380820513","https://openalex.org/W2003209439","https://openalex.org/W2913146933","https://openalex.org/W4321854979","https://openalex.org/W2358319515"],"abstract_inverted_index":{"Automatic":[0],"speech":[1],"recognition":[2],"(ASR)":[3],"models":[4],"are":[5],"typically":[6,115],"designed":[7],"to":[8,68,154,161,167,180,185],"operate":[9],"on":[10,57],"a":[11,17,24,69,73,93,107,111,117,136,162,186],"single":[12,18,118],"input":[13,32,144],"data":[14,33,43,62],"type,":[15],"e.g.":[16],"or":[19],"multi-channel":[20],"audio":[21,120,145],"streamed":[22],"from":[23],"device.":[25],"This":[26],"design":[27],"decision":[28],"assumes":[29],"the":[30],"primary":[31,59,119],"source":[34,44],"does":[35],"not":[36],"change":[37],"and":[38,60,72,81,122,165],"if":[39],"an":[40],"additional":[41,124],"(auxiliary)":[42],"is":[45,85],"occasionally":[46],"available,":[47],"it":[48],"cannot":[49],"be":[50],"used.":[51],"An":[52],"ASR":[53,95],"model":[54,74,96],"that":[55,75,97,139],"operates":[56],"both":[58,78,100,141],"auxiliary":[61,125],"can":[63,76,98],"achieve":[64],"better":[65],"accuracy":[66],"compared":[67,160,184],"primary-only":[70,79],"solution;":[71],"serve":[77,99],"(PO)":[80],"primary-plus-auxiliary":[82],"(PPA)":[83],"modes":[84],"highly":[86],"desirable.":[87],"In":[88],"this":[89],"work,":[90],"we":[91],"propose":[92],"unified":[94],"modes.":[101],"We":[102],"demonstrate":[103],"its":[104],"efficacy":[105],"in":[106,171],"realistic":[108],"scenario":[109],"where":[110],"set":[112],"of":[113,143],"devices":[114],"stream":[116],"channel,":[121],"two":[123],"channels":[126],"only":[127],"when":[128],"upload":[129],"bandwidth":[130],"allows":[131],"it.":[132],"The":[133,174],"architecture":[134],"enables":[135],"unique":[137,175],"methodology":[138,177],"uses":[140],"types":[142],"during":[146],"training":[147,176],"time.":[148],"Our":[149],"proposed":[150],"approach":[151],"achieves":[152,178],"up":[153,166,179],"12.5%":[155],"relative":[156,169,182],"word-error-rate":[157],"reduction":[158],"(WERR)":[159],"PO":[163],"baseline,":[164],"16.0%":[168],"WERR":[170,183],"low-SNR":[172],"conditions.":[173],"2.5%":[181],"PPA":[187],"baseline.":[188]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
