{"id":"https://openalex.org/W3171277129","doi":"https://doi.org/10.21437/interspeech.2021-775","title":"A Comparative Study on Neural Architectures and Training Methods for Japanese Speech Recognition","display_name":"A Comparative Study on Neural Architectures and Training Methods for Japanese Speech Recognition","publication_year":2021,"publication_date":"2021-08-27","ids":{"openalex":"https://openalex.org/W3171277129","doi":"https://doi.org/10.21437/interspeech.2021-775","mag":"3171277129"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2021-775","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-775","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2106.05111","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036532214","display_name":"Shigeki Karita","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shigeki Karita","raw_affiliation_strings":["Google,,,,,"],"affiliations":[{"raw_affiliation_string":"Google,,,,,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102252420","display_name":"Yotaro Kubo","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yotaro Kubo","raw_affiliation_strings":["Google,,,,,"],"affiliations":[{"raw_affiliation_string":"Google,,,,,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049614700","display_name":"Michiel Bacchiani","orcid":"https://orcid.org/0000-0003-4527-0197"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michiel Adriaan Unico Bacchiani","raw_affiliation_strings":["Google,,,,,"],"affiliations":[{"raw_affiliation_string":"Google,,,,,","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023448834","display_name":"Llion Jones","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Llion Jones","raw_affiliation_strings":["Google,,,,,"],"affiliations":[{"raw_affiliation_string":"Google,,,,,","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5036532214"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07636466,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2092","last_page":"2096"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7507041692733765},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.680038332939148},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6741252541542053},{"id":"https://openalex.org/keywords/connectionism","display_name":"Connectionism","score":0.6650588512420654},{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.5579290390014648},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5166914463043213},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4672292172908783},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45898163318634033},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.44873741269111633},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4126480221748352},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3428284823894501},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1365969181060791},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09235075116157532}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7507041692733765},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.680038332939148},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6741252541542053},{"id":"https://openalex.org/C8521452","wikidata":"https://www.wikidata.org/wiki/Q203790","display_name":"Connectionism","level":3,"score":0.6650588512420654},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.5579290390014648},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5166914463043213},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4672292172908783},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45898163318634033},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.44873741269111633},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4126480221748352},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3428284823894501},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1365969181060791},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09235075116157532},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.21437/interspeech.2021-775","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2021-775","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2021","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2106.05111","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.05111","pdf_url":"https://arxiv.org/pdf/2106.05111","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3171277129","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2106.05111","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2106.05111","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2106.05111","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2106.05111","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2106.05111","pdf_url":"https://arxiv.org/pdf/2106.05111","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W3171277129.pdf"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1586532344","https://openalex.org/W1828163288","https://openalex.org/W1964175594","https://openalex.org/W2064675550","https://openalex.org/W2127141656","https://openalex.org/W2342173569","https://openalex.org/W2567070169","https://openalex.org/W2577366047","https://openalex.org/W2627092829","https://openalex.org/W2928941594","https://openalex.org/W2936774411","https://openalex.org/W2962780374","https://openalex.org/W2962972936","https://openalex.org/W2963403868","https://openalex.org/W2964110616","https://openalex.org/W2964121744","https://openalex.org/W2964308564","https://openalex.org/W2972780808","https://openalex.org/W2972818416","https://openalex.org/W2974008169","https://openalex.org/W2995181338","https://openalex.org/W3093579165","https://openalex.org/W3094800360","https://openalex.org/W3095311338","https://openalex.org/W3096318498","https://openalex.org/W3097777922"],"related_works":["https://openalex.org/W2785450052","https://openalex.org/W2890197052","https://openalex.org/W2128367848","https://openalex.org/W3163339532","https://openalex.org/W2992448548","https://openalex.org/W3155343771","https://openalex.org/W1974455634","https://openalex.org/W3111180042","https://openalex.org/W3025521026","https://openalex.org/W590458053","https://openalex.org/W2775304348","https://openalex.org/W3202419788","https://openalex.org/W3155168732","https://openalex.org/W3160551958","https://openalex.org/W2809840276","https://openalex.org/W2520176975","https://openalex.org/W2777114772","https://openalex.org/W2403046421","https://openalex.org/W2936399153","https://openalex.org/W2154104743"],"abstract_inverted_index":{"End-to-end":[0],"(E2E)":[1],"modeling":[2,23,38],"is":[3,18,24,139],"advantageous":[4],"for":[5,11,125],"automatic":[6],"speech":[7],"recognition":[8],"(ASR)":[9],"especially":[10],"Japanese":[12,17,46,129],"since":[13],"word-based":[14],"tokenization":[15],"of":[16,65,90,120,127,150],"not":[19],"trivial,":[20],"and":[21,40,56,70,80,103,123,133],"E2E":[22,37],"able":[25],"to":[26,60,142,147],"model":[27],"character":[28,117],"sequences":[29],"directly.":[30],"This":[31],"paper":[32,86,113],"focuses":[33],"on":[34,44,88],"the":[35,62,85,91,112,115,148],"latest":[36],"techniques,":[39],"investigates":[41,87],"their":[42],"performances":[43],"character-based":[45],"ASR":[47],"by":[48],"conducting":[49],"comparative":[50],"experiments.":[51],"The":[52,107,137],"results":[53],"are":[54],"analyzed":[55],"discussed":[57],"in":[58,73,111],"order":[59],"understand":[61],"relative":[63],"advantages":[64],"long":[66],"short-term":[67],"memory":[68],"(LSTM),":[69],"Conformer":[71,151],"models":[72],"combination":[74],"with":[75],"connectionist":[76],"temporal":[77],"classification,":[78],"transducer,":[79],"attention-based":[81],"loss":[82],"functions.":[83],"Furthermore,":[84],"effectivity":[89],"recent":[92],"training":[93],"techniques":[94],"such":[95],"as":[96],"data":[97],"augmentation":[98],"(SpecAugment),":[99],"variational":[100],"noise":[101],"injection,":[102],"exponential":[104],"moving":[105],"average.":[106],"best":[108],"configuration":[109],"found":[110],"achieved":[114],"state-of-the-art":[116],"error":[118],"rates":[119],"4.1%,":[121],"3.2%,":[122],"3.5%":[124],"Corpus":[126],"Spontaneous":[128],"(CSJ)":[130],"eval1,":[131],"eval2,":[132],"eval3":[134],"tasks,":[135],"respectively.":[136],"system":[138],"also":[140],"shown":[141],"be":[143],"computationally":[144],"efficient":[145],"thanks":[146],"efficiency":[149],"transducers.":[152]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
