{"id":"https://openalex.org/W3215167351","doi":"https://doi.org/10.1109/asru51503.2021.9687942","title":"A Conformer-Based ASR Frontend for Joint Acoustic Echo Cancellation, Speech Enhancement and Speech Separation","display_name":"A Conformer-Based ASR Frontend for Joint Acoustic Echo Cancellation, Speech Enhancement and Speech Separation","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W3215167351","doi":"https://doi.org/10.1109/asru51503.2021.9687942","mag":"3215167351"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9687942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9687942","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2111.09935","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042631153","display_name":"Tom O\u2019Malley","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tom O'Malley","raw_affiliation_strings":["Google LLC,U.S.A"],"affiliations":[{"raw_affiliation_string":"Google LLC,U.S.A","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000078382","display_name":"Arun Narayanan","orcid":"https://orcid.org/0009-0008-3325-8928"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arun Narayanan","raw_affiliation_strings":["Google LLC,U.S.A"],"affiliations":[{"raw_affiliation_string":"Google LLC,U.S.A","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108047863","display_name":"Quan Wang","orcid":"https://orcid.org/0000-0001-5483-0243"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Quan Wang","raw_affiliation_strings":["Google LLC,U.S.A"],"affiliations":[{"raw_affiliation_string":"Google LLC,U.S.A","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111429885","display_name":"Alex Park","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alex Park","raw_affiliation_strings":["Google LLC,U.S.A"],"affiliations":[{"raw_affiliation_string":"Google LLC,U.S.A","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101928782","display_name":"James S. Walker","orcid":"https://orcid.org/0000-0002-7184-584X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Walker","raw_affiliation_strings":["Google LLC,U.S.A"],"affiliations":[{"raw_affiliation_string":"Google LLC,U.S.A","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060708515","display_name":"N. T. Howard","orcid":"https://orcid.org/0000-0002-8787-6309"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nathan Howard","raw_affiliation_strings":["Google LLC,U.S.A"],"affiliations":[{"raw_affiliation_string":"Google LLC,U.S.A","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5042631153"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":0.1721,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.3654105,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":93},"biblio":{"volume":null,"issue":null,"first_page":"304","last_page":"311"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7448105812072754},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7443621158599854},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6022388935089111},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.5631170868873596},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.5362638235092163},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.4908347725868225},{"id":"https://openalex.org/keywords/echo","display_name":"Echo (communications protocol)","score":0.4480116367340088},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.43694978952407837},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32151180505752563},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.31037667393684387}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7448105812072754},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7443621158599854},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6022388935089111},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5631170868873596},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.5362638235092163},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.4908347725868225},{"id":"https://openalex.org/C2779426996","wikidata":"https://www.wikidata.org/wiki/Q18389128","display_name":"Echo (communications protocol)","level":2,"score":0.4480116367340088},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.43694978952407837},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32151180505752563},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.31037667393684387},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/asru51503.2021.9687942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9687942","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2111.09935","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2111.09935","pdf_url":"https://arxiv.org/pdf/2111.09935","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3215167351","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2111.09935","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2111.09935","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2111.09935","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2111.09935","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2111.09935","pdf_url":"https://arxiv.org/pdf/2111.09935","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.6100000143051147,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3215167351.pdf","grobid_xml":"https://content.openalex.org/works/W3215167351.grobid-xml"},"referenced_works_count":72,"referenced_works":["https://openalex.org/W48610814","https://openalex.org/W123803282","https://openalex.org/W580085951","https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W2042141988","https://openalex.org/W2062164080","https://openalex.org/W2065804682","https://openalex.org/W2101045344","https://openalex.org/W2132882255","https://openalex.org/W2136439176","https://openalex.org/W2141411743","https://openalex.org/W2221409856","https://openalex.org/W2402144811","https://openalex.org/W2460742184","https://openalex.org/W2473329891","https://openalex.org/W2550493152","https://openalex.org/W2559809918","https://openalex.org/W2617258110","https://openalex.org/W2627092829","https://openalex.org/W2696967604","https://openalex.org/W2734774145","https://openalex.org/W2739427748","https://openalex.org/W2748545504","https://openalex.org/W2749510669","https://openalex.org/W2750499125","https://openalex.org/W2884797218","https://openalex.org/W2888860279","https://openalex.org/W2889013998","https://openalex.org/W2928941594","https://openalex.org/W2936774411","https://openalex.org/W2937769560","https://openalex.org/W2953932601","https://openalex.org/W2962788625","https://openalex.org/W2963122170","https://openalex.org/W2963431393","https://openalex.org/W2963827914","https://openalex.org/W2963921132","https://openalex.org/W2964238697","https://openalex.org/W2972461400","https://openalex.org/W2973062255","https://openalex.org/W3008181812","https://openalex.org/W3008880747","https://openalex.org/W3015191643","https://openalex.org/W3015194534","https://openalex.org/W3015746570","https://openalex.org/W3025165719","https://openalex.org/W3044278286","https://openalex.org/W3048487650","https://openalex.org/W3095248373","https://openalex.org/W3097777922","https://openalex.org/W3143332423","https://openalex.org/W3160766462","https://openalex.org/W3162646409","https://openalex.org/W3163842642","https://openalex.org/W3197845638","https://openalex.org/W3211065263","https://openalex.org/W4226191785","https://openalex.org/W6629717138","https://openalex.org/W6631190155","https://openalex.org/W6713134421","https://openalex.org/W6738243166","https://openalex.org/W6739366949","https://openalex.org/W6740674931","https://openalex.org/W6741807409","https://openalex.org/W6743244400","https://openalex.org/W6743440867","https://openalex.org/W6743590492","https://openalex.org/W6774687970","https://openalex.org/W6776048756","https://openalex.org/W6780404847","https://openalex.org/W6781924587"],"related_works":[],"abstract_inverted_index":{"We":[0,106],"present":[1,107],"a":[2,18,34,51,65,136],"frontend":[3],"for":[4,61,71,100],"improving":[5],"robustness":[6],"of":[7,44,47,54,83,87],"automatic":[8],"speech":[9,24,27,72,95,104],"recognition":[10],"(ASR),":[11],"that":[12,39,112],"jointly":[13],"implements":[14],"three":[15],"modules":[16],"within":[17,187],"single":[19],"model:":[20],"acoustic":[21],"echo":[22,62,101,165,191],"cancellation,":[23],"enhancement,":[25],"and":[26,74,103,124,173,199],"separation.":[28],"This":[29],"is":[30,59,69,90],"achieved":[31],"by":[32,159],"using":[33,135],"contextual":[35],"enhancement":[36],"neural":[37],"network":[38],"can":[40],"optionally":[41],"make":[42],"use":[43],"different":[45],"types":[46],"side":[48],"inputs:":[49],"(1)":[50],"reference":[52],"signal":[53],"the":[55,80,84,113,121,143,146,150,183,196,202],"playback":[56],"audio,":[57],"which":[58,68,89],"necessary":[60],"cancellation;":[63],"(2)":[64],"noise":[66],"context,":[67],"useful":[70],"enhancement;":[73],"(3)":[75],"an":[76],"embedding":[77],"vector":[78],"representing":[79],"voice":[81],"characteristic":[82],"target":[85],"speaker":[86],"interest,":[88],"not":[91],"only":[92],"critical":[93],"in":[94,130,154],"separation,":[96],"but":[97],"also":[98],"helpful":[99],"cancellation":[102,166,192],"enhancement.":[105],"detailed":[108],"evaluations":[109],"to":[110,142,180],"show":[111],"joint":[114,147,184],"model":[115,148,185],"performs":[116,186],"almost":[117],"as":[118,120],"well":[119],"task-specific":[122,181],"models,":[123,182],"significantly":[125],"reduces":[126,149],"word":[127,151],"error":[128,152],"rate":[129,153],"noisy":[131,144,171,197],"conditions":[132,158],"even":[133],"when":[134],"large-scale":[137],"state-of-the-art":[138],"ASR":[139],"model.":[140],"Compared":[141,179],"baseline,":[145],"low":[155],"signal-to-noise":[156],"ratio":[157],"at":[160],"least":[161],"71%":[162],"on":[163,169,175,189,195,201],"our":[164,170,176,190],"dataset,":[167,172,193,198],"10%":[168,188],"26%":[174],"multi-speaker":[177,203],"dataset.":[178,204],"2%":[194],"3%":[200]},"counts_by_year":[{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2021-12-06T00:00:00"}
