{"id":"https://openalex.org/W4210663600","doi":"https://doi.org/10.1109/asru51503.2021.9688251","title":"A Study of Transducer Based End-to-End ASR with ESPnet: Architecture, Auxiliary Loss and Decoding Strategies","display_name":"A Study of Transducer Based End-to-End ASR with ESPnet: Architecture, Auxiliary Loss and Decoding Strategies","publication_year":2021,"publication_date":"2021-12-13","ids":{"openalex":"https://openalex.org/W4210663600","doi":"https://doi.org/10.1109/asru51503.2021.9688251"},"language":"en","primary_location":{"id":"doi:10.1109/asru51503.2021.9688251","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688251","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104138472","display_name":"Florian Boyer","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I15057530","display_name":"Universit\u00e9 de Bordeaux","ror":"https://ror.org/057qpr032","country_code":"FR","type":"education","lineage":["https://openalex.org/I15057530"]},{"id":"https://openalex.org/I4210142254","display_name":"Laboratoire Bordelais de Recherche en Informatique","ror":"https://ror.org/03adqg323","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I15057530","https://openalex.org/I4210142254","https://openalex.org/I4210159245","https://openalex.org/I4210160189"]},{"id":"https://openalex.org/I4210160189","display_name":"Institut Polytechnique de Bordeaux","ror":"https://ror.org/054qv7y42","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210160189"]},{"id":"https://openalex.org/I77216272","display_name":"NOAA Air Resources Laboratory","ror":"https://ror.org/0061q5f61","country_code":"US","type":"government","lineage":["https://openalex.org/I1308126019","https://openalex.org/I1343035065","https://openalex.org/I2802992173","https://openalex.org/I77216272"]}],"countries":["FR","US"],"is_corresponding":true,"raw_author_name":"Florian Boyer","raw_affiliation_strings":["Airudit, Speech Lab","LaBRI, Bordeaux INP, CNRS, UMR 5800"],"affiliations":[{"raw_affiliation_string":"Airudit, Speech Lab","institution_ids":["https://openalex.org/I77216272"]},{"raw_affiliation_string":"LaBRI, Bordeaux INP, CNRS, UMR 5800","institution_ids":["https://openalex.org/I4210160189","https://openalex.org/I4210142254","https://openalex.org/I15057530","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109365728","display_name":"Yusuke Shinohara","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yusuke Shinohara","raw_affiliation_strings":["Yahoo Japan Corporation"],"affiliations":[{"raw_affiliation_string":"Yahoo Japan Corporation","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007884618","display_name":"Takaaki Ishii","orcid":"https://orcid.org/0000-0001-6690-7834"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Takaaki Ishii","raw_affiliation_strings":["Yahoo Japan Corporation"],"affiliations":[{"raw_affiliation_string":"Yahoo Japan Corporation","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040282669","display_name":"Hirofumi Inaguma","orcid":"https://orcid.org/0000-0003-0610-1251"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hirofumi Inaguma","raw_affiliation_strings":["Kyoto University"],"affiliations":[{"raw_affiliation_string":"Kyoto University","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]},{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Carnegie Mellon University","Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5104138472"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I15057530","https://openalex.org/I4210142254","https://openalex.org/I4210160189","https://openalex.org/I77216272"],"apc_list":null,"apc_paid":null,"fwci":2.1566,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.90478832,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"16","last_page":"23"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.8608168363571167},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7581332921981812},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7101410627365112},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.6587642431259155},{"id":"https://openalex.org/keywords/proposition","display_name":"Proposition","score":0.4375486373901367},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.363170325756073},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3430059552192688},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.33867812156677246},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.14899137616157532},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10974308848381042},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.09127414226531982}],"concepts":[{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.8608168363571167},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7581332921981812},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7101410627365112},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.6587642431259155},{"id":"https://openalex.org/C2777152325","wikidata":"https://www.wikidata.org/wiki/Q108163","display_name":"Proposition","level":2,"score":0.4375486373901367},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.363170325756073},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3430059552192688},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.33867812156677246},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14899137616157532},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10974308848381042},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.09127414226531982},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru51503.2021.9688251","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru51503.2021.9688251","pdf_url":null,"source":{"id":"https://openalex.org/S4363606113","display_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.6600000262260437,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":58,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1828163288","https://openalex.org/W2109886035","https://openalex.org/W2183341477","https://openalex.org/W2526425061","https://openalex.org/W2551887912","https://openalex.org/W2608712415","https://openalex.org/W2746192915","https://openalex.org/W2787663903","https://openalex.org/W2799923439","https://openalex.org/W2892009249","https://openalex.org/W2950872548","https://openalex.org/W2962760690","https://openalex.org/W2962780374","https://openalex.org/W2963242190","https://openalex.org/W2963414781","https://openalex.org/W2963719423","https://openalex.org/W2972818416","https://openalex.org/W2974231335","https://openalex.org/W2976556660","https://openalex.org/W3005302685","https://openalex.org/W3007328579","https://openalex.org/W3008174054","https://openalex.org/W3015190365","https://openalex.org/W3015315932","https://openalex.org/W3015537910","https://openalex.org/W3015966793","https://openalex.org/W3016010032","https://openalex.org/W3094800360","https://openalex.org/W3095697114","https://openalex.org/W3095783102","https://openalex.org/W3097777922","https://openalex.org/W3097882114","https://openalex.org/W3105532142","https://openalex.org/W3144557079","https://openalex.org/W3149509723","https://openalex.org/W3160987270","https://openalex.org/W3162899666","https://openalex.org/W3163793923","https://openalex.org/W3197507772","https://openalex.org/W4296998151","https://openalex.org/W6629717138","https://openalex.org/W6638749077","https://openalex.org/W6676562027","https://openalex.org/W6686164453","https://openalex.org/W6736086397","https://openalex.org/W6737292097","https://openalex.org/W6747398299","https://openalex.org/W6767671539","https://openalex.org/W6768361912","https://openalex.org/W6770425081","https://openalex.org/W6773880771","https://openalex.org/W6775722199","https://openalex.org/W6784599769","https://openalex.org/W6785031418","https://openalex.org/W6785229415","https://openalex.org/W6929543497","https://openalex.org/W7027429494"],"related_works":["https://openalex.org/W19473193","https://openalex.org/W2349990005","https://openalex.org/W2369153420","https://openalex.org/W2388815250","https://openalex.org/W2394094611","https://openalex.org/W15182647","https://openalex.org/W2366429403","https://openalex.org/W150594749","https://openalex.org/W2158828542","https://openalex.org/W1738715547"],"abstract_inverted_index":{"In":[0],"this":[1],"study,":[2],"we":[3,46,68,104],"present":[4],"recent":[5],"developments":[6],"of":[7,20,110],"models":[8,72],"trained":[9],"with":[10,30],"the":[11,18,91,108,111,115,123],"RNN-T":[12],"loss":[13],"in":[14,80,82,130],"ESPnet.":[15],"It":[16],"involves":[17],"use":[19],"various":[21],"archi-tectures":[22],"such":[23,62],"as":[24,63],"recently":[25],"proposed":[26,50],"Conformer,":[27],"multi-task":[28],"learning":[29],"different":[31],"auxiliary":[32],"criteria":[33],"and":[34,44,65,87,118,132],"multiple":[35],"decoding":[36,88],"strategies,":[37],"in-cluding":[38],"our":[39,49,128],"own":[40],"proposition.":[41],"Through":[42],"experiments":[43],"benchmarks,":[45],"show":[47],"that":[48,70],"systems":[51,58,79,96,129],"can":[52],"be":[53],"competitive":[54],"against":[55,75],"other":[56,76],"state-of-art":[57],"on":[59],"well-known":[60],"datasets":[61],"LibriSpeech":[64],"AISHELL-1.":[66],"Additionally,":[67],"demonstrate":[69],"these":[71,102],"are":[73],"promising":[74],"already":[77],"implemented":[78],"ESPnet":[81,112],"regards":[83],"to":[84,93,106,126],"both":[85],"performance":[86],"speed,":[89],"enabling":[90],"pos-sibility":[92],"have":[94],"powerful":[95],"for":[97,114,122],"a":[98],"streaming":[99],"task.":[100],"With":[101],"additions,":[103],"hope":[105],"expand":[107],"usefulness":[109],"toolkit":[113],"research":[116],"community":[117],"also":[119],"give":[120],"tools":[121],"ASR":[124],"industry":[125],"deploy":[127],"realistic":[131],"production":[133],"environments.":[134]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":5}],"updated_date":"2026-04-04T06:10:10.580331","created_date":"2025-10-10T00:00:00"}
