{"id":"https://openalex.org/W2972818416","doi":"https://doi.org/10.1109/asru46091.2019.9003750","title":"A Comparative Study on Transformer vs RNN in Speech Applications","display_name":"A Comparative Study on Transformer vs RNN in Speech Applications","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W2972818416","doi":"https://doi.org/10.1109/asru46091.2019.9003750","mag":"2972818416"},"language":"en","primary_location":{"id":"doi:10.1109/asru46091.2019.9003750","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru46091.2019.9003750","pdf_url":null,"source":{"id":"https://openalex.org/S4306498489","display_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1909.06317","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036532214","display_name":"Shigeki Karita","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shigeki Karita","raw_affiliation_strings":["NTT Communication Science Laboratories"],"affiliations":[{"raw_affiliation_string":"NTT Communication Science Laboratories","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071702566","display_name":"Nanxin Chen","orcid":"https://orcid.org/0000-0001-6698-1604"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nanxin Chen","raw_affiliation_strings":["Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078778981","display_name":"Tomoki Hayashi","orcid":"https://orcid.org/0000-0001-8782-4093"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tomoki Hayashi","raw_affiliation_strings":["Human Dataware Lab. Co., Ltd"],"affiliations":[{"raw_affiliation_string":"Human Dataware Lab. Co., Ltd","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087554069","display_name":"Takaaki Hori","orcid":"https://orcid.org/0000-0003-4560-8039"},"institutions":[{"id":"https://openalex.org/I4210159266","display_name":"Mitsubishi Electric (United States)","ror":"https://ror.org/053jnhe44","country_code":"US","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125","https://openalex.org/I4210159266"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Takaaki Hori","raw_affiliation_strings":["Mitsubishi Electric Research Laboratories"],"affiliations":[{"raw_affiliation_string":"Mitsubishi Electric Research Laboratories","institution_ids":["https://openalex.org/I4210159266"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040282669","display_name":"Hirofumi Inaguma","orcid":"https://orcid.org/0000-0003-0610-1251"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hirofumi Inaguma","raw_affiliation_strings":["Kyoto University"],"affiliations":[{"raw_affiliation_string":"Kyoto University","institution_ids":["https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066144392","display_name":"Ziyan Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ziyan Jiang","raw_affiliation_strings":["Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030925462","display_name":"Masao Someki","orcid":null},"institutions":[{"id":"https://openalex.org/I60134161","display_name":"Nagoya University","ror":"https://ror.org/04chrp450","country_code":"JP","type":"education","lineage":["https://openalex.org/I60134161"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Masao Someki","raw_affiliation_strings":["Nagoya University"],"affiliations":[{"raw_affiliation_string":"Nagoya University","institution_ids":["https://openalex.org/I60134161"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004736242","display_name":"Nelson Enrique Yalta Soplin","orcid":"https://orcid.org/0000-0001-7907-0076"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Nelson Enrique Yalta Soplin","raw_affiliation_strings":["Waseda University"],"affiliations":[{"raw_affiliation_string":"Waseda University","institution_ids":["https://openalex.org/I150744194"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100669485","display_name":"Ryuichi Yamamoto","orcid":"https://orcid.org/0000-0003-0299-5470"},"institutions":[{"id":"https://openalex.org/I4210096607","display_name":"Line Corporation (Japan)","ror":"https://ror.org/00qg8pm87","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210096607","https://openalex.org/I60922564"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Ryuichi Yamamoto","raw_affiliation_strings":["LINE Corporation"],"affiliations":[{"raw_affiliation_string":"LINE Corporation","institution_ids":["https://openalex.org/I4210096607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100363489","display_name":"Xiaofei Wang","orcid":"https://orcid.org/0009-0004-6683-3969"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaofei Wang","raw_affiliation_strings":["Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101948311","display_name":"Takenori Yoshimura","orcid":"https://orcid.org/0000-0003-3964-5677"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Takenori Yoshimura","raw_affiliation_strings":["Human Dataware Lab. Co., Ltd"],"affiliations":[{"raw_affiliation_string":"Human Dataware Lab. Co., Ltd","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071937621","display_name":"Wangyou Zhang","orcid":"https://orcid.org/0000-0003-4500-3515"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wangyou Zhang","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5036532214"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":72.5045,"has_fulltext":false,"cited_by_count":773,"citation_normalized_percentile":{"value":0.99929395,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"449","last_page":"456"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.828636884689331},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7661212682723999},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.7158658504486084},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.6649037599563599},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6456910371780396},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.47855645418167114},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4379500448703766},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4346567392349243},{"id":"https://openalex.org/keywords/speech-translation","display_name":"Speech translation","score":0.41729235649108887},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.13018199801445007},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.10307016968727112},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.06359374523162842}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.828636884689331},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7661212682723999},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.7158658504486084},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.6649037599563599},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6456910371780396},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47855645418167114},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4379500448703766},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4346567392349243},{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.41729235649108887},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13018199801445007},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.10307016968727112},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.06359374523162842}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/asru46091.2019.9003750","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru46091.2019.9003750","pdf_url":null,"source":{"id":"https://openalex.org/S4306498489","display_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1909.06317","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.06317","pdf_url":"https://arxiv.org/pdf/1909.06317","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1909.06317","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.06317","pdf_url":"https://arxiv.org/pdf/1909.06317","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6499999761581421,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":75,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W37526647","https://openalex.org/W179875071","https://openalex.org/W1494198834","https://openalex.org/W1524333225","https://openalex.org/W1526236009","https://openalex.org/W1902237438","https://openalex.org/W2024490156","https://openalex.org/W2085628288","https://openalex.org/W2101045344","https://openalex.org/W2127141656","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2166637769","https://openalex.org/W2250357346","https://openalex.org/W2327501763","https://openalex.org/W2404126548","https://openalex.org/W2407080277","https://openalex.org/W2559809918","https://openalex.org/W2605131327","https://openalex.org/W2627092829","https://openalex.org/W2765486990","https://openalex.org/W2767017151","https://openalex.org/W2767052532","https://openalex.org/W2786835190","https://openalex.org/W2796108585","https://openalex.org/W2799473636","https://openalex.org/W2799800213","https://openalex.org/W2809456172","https://openalex.org/W2884797218","https://openalex.org/W2886180730","https://openalex.org/W2892009249","https://openalex.org/W2901607128","https://openalex.org/W2903739847","https://openalex.org/W2936774411","https://openalex.org/W2943845043","https://openalex.org/W2944255943","https://openalex.org/W2946200149","https://openalex.org/W2951418500","https://openalex.org/W2962778134","https://openalex.org/W2962780374","https://openalex.org/W2963242190","https://openalex.org/W2963250244","https://openalex.org/W2963403868","https://openalex.org/W2963418779","https://openalex.org/W2963499433","https://openalex.org/W2963609956","https://openalex.org/W2963807318","https://openalex.org/W2964243274","https://openalex.org/W2964308564","https://openalex.org/W2970730223","https://openalex.org/W2972389417","https://openalex.org/W3012492057","https://openalex.org/W3101648800","https://openalex.org/W3103005696","https://openalex.org/W3104636952","https://openalex.org/W4297747548","https://openalex.org/W4385245566","https://openalex.org/W6600284362","https://openalex.org/W6601563604","https://openalex.org/W6629717138","https://openalex.org/W6631362777","https://openalex.org/W6679434410","https://openalex.org/W6679436768","https://openalex.org/W6691509046","https://openalex.org/W6713762819","https://openalex.org/W6736996214","https://openalex.org/W6739366949","https://openalex.org/W6739901393","https://openalex.org/W6745289305","https://openalex.org/W6749669830","https://openalex.org/W6752527106","https://openalex.org/W6757079273","https://openalex.org/W6763832098","https://openalex.org/W6775053297"],"related_works":["https://openalex.org/W3176018525","https://openalex.org/W2903533908","https://openalex.org/W2903810591","https://openalex.org/W3026554633","https://openalex.org/W2888520903","https://openalex.org/W2890256614","https://openalex.org/W3098873988","https://openalex.org/W2949454572","https://openalex.org/W3066373881","https://openalex.org/W2970247882"],"abstract_inverted_index":{"Sequence-to-sequence":[0],"models":[1],"have":[2],"been":[3],"widely":[4],"used":[5],"in":[6,36,50,64,101,105],"end-to-end":[7],"speech":[8,13,16],"processing,":[9],"for":[10,92,124,132],"example,":[11],"automatic":[12],"recognition":[14],"(ASR),":[15],"translation":[17,39],"(ST),":[18],"and":[19,40,55,58,75,85,120,129],"text-to-speech":[20],"(TTS).":[21],"This":[22],"paper":[23],"focuses":[24],"on":[25],"an":[26],"emergent":[27],"sequence-to-sequence":[28],"model":[29],"called":[30],"Transformer,":[31],"which":[32,51],"achieves":[33],"state-of-the-art":[34],"performance":[35,87],"neural":[37,61],"machine":[38],"other":[41],"natural":[42],"language":[43],"processing":[44],"applications.":[45],"We":[46,109],"undertook":[47],"intensive":[48],"studies":[49],"we":[52],"experimentally":[53],"compared":[54],"analyzed":[56],"Transformer":[57,91,100],"conventional":[59],"recurrent":[60],"networks":[62],"(RNN)":[63],"a":[65],"total":[66],"of":[67,99],"15":[68],"ASR,":[69,72,127],"one":[70,73],"multilingual":[71],"ST,":[74,128],"two":[76],"TTS":[77,130],"benchmarks.":[78],"Our":[79],"experiments":[80],"revealed":[81],"various":[82],"training":[83],"tips":[84],"significant":[86],"benefits":[88],"obtained":[89],"with":[90,107],"each":[93],"task":[94],"including":[95],"the":[96,126,133],"surprising":[97],"superiority":[98],"13/15":[102],"ASR":[103],"benchmarks":[104],"comparison":[106],"RNN.":[108],"are":[110],"preparing":[111],"to":[112,135],"release":[113],"Kaldi-style":[114],"reproducible":[115],"recipes":[116],"using":[117],"open":[118],"source":[119],"publicly":[121],"available":[122],"datasets":[123],"all":[125],"tasks":[131],"community":[134],"succeed":[136],"our":[137],"exciting":[138],"outcomes.":[139]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":62},{"year":2024,"cited_by_count":80},{"year":2023,"cited_by_count":127},{"year":2022,"cited_by_count":93},{"year":2021,"cited_by_count":226},{"year":2020,"cited_by_count":162},{"year":2019,"cited_by_count":17},{"year":2013,"cited_by_count":1}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
