{"id":"https://openalex.org/W4392909068","doi":"https://doi.org/10.1109/icassp48485.2024.10447929","title":"Exploring Speech Recognition, Translation, and Understanding with Discrete Speech Units: A Comparative Study","display_name":"Exploring Speech Recognition, Translation, and Understanding with Discrete Speech Units: A Comparative Study","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392909068","doi":"https://doi.org/10.1109/icassp48485.2024.10447929"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447929","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447929","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050058892","display_name":"Xuankai Chang","orcid":"https://orcid.org/0000-0002-5221-5412"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xuankai Chang","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021201726","display_name":"Brian Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian Yan","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023799668","display_name":"Kwanghee Choi","orcid":"https://orcid.org/0000-0001-5254-1093"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kwanghee Choi","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091834525","display_name":"Jee-weon Jung","orcid":"https://orcid.org/0000-0003-0505-2988"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jee-Weon Jung","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102942362","display_name":"Yichen Lu","orcid":"https://orcid.org/0000-0003-0296-3540"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yichen Lu","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010858961","display_name":"Soumi Maiti","orcid":"https://orcid.org/0000-0001-6940-0115"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Soumi Maiti","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058649002","display_name":"Roshan Sharma","orcid":"https://orcid.org/0000-0003-3760-9955"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Roshan Sharma","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008336983","display_name":"Jiatong Shi","orcid":"https://orcid.org/0000-0002-9050-8304"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiatong Shi","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068192693","display_name":"Jinchuan Tian","orcid":"https://orcid.org/0000-0002-2129-471X"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinchuan Tian","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Carnegie Mellon University"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040732498","display_name":"Yuya Fujita","orcid":"https://orcid.org/0000-0001-8155-6040"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuya Fujita","raw_affiliation_strings":["Yahoo Japan Corporation"],"affiliations":[{"raw_affiliation_string":"Yahoo Japan Corporation","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059612504","display_name":"Takashi Maekaku","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Takashi Maekaku","raw_affiliation_strings":["Yahoo Japan Corporation"],"affiliations":[{"raw_affiliation_string":"Yahoo Japan Corporation","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101783173","display_name":"Pengcheng Guo","orcid":"https://orcid.org/0009-0001-2388-5935"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pengcheng Guo","raw_affiliation_strings":["Northwestern Polytechnical University"],"affiliations":[{"raw_affiliation_string":"Northwestern Polytechnical University","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002404521","display_name":"Yao-Fei Cheng","orcid":null},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yao-Fei Cheng","raw_affiliation_strings":["University of Washington"],"affiliations":[{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081451455","display_name":"Pavel Denisov","orcid":null},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Pavel Denisov","raw_affiliation_strings":["University of Stuttgart"],"affiliations":[{"raw_affiliation_string":"University of Stuttgart","institution_ids":["https://openalex.org/I100066346"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013385059","display_name":"Kohei Saijo","orcid":null},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kohei Saijo","raw_affiliation_strings":["Waseda University"],"affiliations":[{"raw_affiliation_string":"Waseda University","institution_ids":["https://openalex.org/I150744194"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035483337","display_name":"Hsiu-Hsuan Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hsiu-Hsuan Wang","raw_affiliation_strings":["National Taiwan University"],"affiliations":[{"raw_affiliation_string":"National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":5,"institutions_distinct_count":17,"corresponding_author_ids":["https://openalex.org/A5050058892"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":9.948,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.98439494,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"11481","last_page":"11485"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7962751388549805},{"id":"https://openalex.org/keywords/speech-translation","display_name":"Speech translation","score":0.7771426439285278},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.743194580078125},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7209509611129761},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.5932611227035522},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5212891101837158},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5069537162780762},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4402114450931549},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.41948699951171875},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4116699695587158},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.2908138632774353}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7962751388549805},{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.7771426439285278},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.743194580078125},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7209509611129761},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.5932611227035522},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5212891101837158},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5069537162780762},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4402114450931549},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.41948699951171875},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4116699695587158},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.2908138632774353},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447929","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447929","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1353381076","display_name":null,"funder_award_id":"2138296","funder_id":"https://openalex.org/F4320312143","funder_display_name":"National Centre for Supercomputing Applications"},{"id":"https://openalex.org/G1912064254","display_name":null,"funder_award_id":"2137603","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2507572247","display_name":null,"funder_award_id":"2138286","funder_id":"https://openalex.org/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"},{"id":"https://openalex.org/G4006316252","display_name":null,"funder_award_id":"2138259","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4010157413","display_name":null,"funder_award_id":"2138259","funder_id":"https://openalex.org/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"},{"id":"https://openalex.org/G4968279142","display_name":null,"funder_award_id":"2138307","funder_id":"https://openalex.org/F4320312143","funder_display_name":"National Centre for Supercomputing Applications"},{"id":"https://openalex.org/G509285653","display_name":null,"funder_award_id":"2138307","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5173294414","display_name":null,"funder_award_id":"2138286","funder_id":"https://openalex.org/F4320312143","funder_display_name":"National Centre for Supercomputing Applications"},{"id":"https://openalex.org/G5464919783","display_name":null,"funder_award_id":"2137603","funder_id":"https://openalex.org/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"},{"id":"https://openalex.org/G6496325548","display_name":null,"funder_award_id":"A6000","funder_id":"https://openalex.org/F4320309480","funder_display_name":"Nvidia"},{"id":"https://openalex.org/G6664589177","display_name":null,"funder_award_id":"2138307","funder_id":"https://openalex.org/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"},{"id":"https://openalex.org/G8165146668","display_name":null,"funder_award_id":"2138259","funder_id":"https://openalex.org/F4320312143","funder_display_name":"National Centre for Supercomputing Applications"},{"id":"https://openalex.org/G8300691027","display_name":null,"funder_award_id":"2138286","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8819975937","display_name":null,"funder_award_id":"2137603","funder_id":"https://openalex.org/F4320312143","funder_display_name":"National Centre for Supercomputing Applications"},{"id":"https://openalex.org/G8897435675","display_name":null,"funder_award_id":"2138296","funder_id":"https://openalex.org/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320312143","display_name":"National Centre for Supercomputing Applications","ror":"https://ror.org/03r10zj06"},{"id":"https://openalex.org/F4320337377","display_name":"Office of Advanced Cyberinfrastructure","ror":"https://ror.org/04nh1dc89"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W854541894","https://openalex.org/W1494198834","https://openalex.org/W2127141656","https://openalex.org/W2144499799","https://openalex.org/W2160815625","https://openalex.org/W2166637769","https://openalex.org/W2398826216","https://openalex.org/W2515753980","https://openalex.org/W2752796333","https://openalex.org/W2799473636","https://openalex.org/W2899274165","https://openalex.org/W2962780374","https://openalex.org/W2963242190","https://openalex.org/W2963799213","https://openalex.org/W2973049979","https://openalex.org/W3015356564","https://openalex.org/W3024605872","https://openalex.org/W3036601975","https://openalex.org/W3097777922","https://openalex.org/W3100460087","https://openalex.org/W3101648800","https://openalex.org/W3163793923","https://openalex.org/W3198587774","https://openalex.org/W3198694222","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3215615641","https://openalex.org/W4200635400","https://openalex.org/W4307323391","https://openalex.org/W4313679638","https://openalex.org/W4319862255","https://openalex.org/W4375869259","https://openalex.org/W4381786045","https://openalex.org/W4381827575","https://openalex.org/W4385245566","https://openalex.org/W4385565440","https://openalex.org/W4385570101","https://openalex.org/W4385822439","https://openalex.org/W4385822683","https://openalex.org/W4385970143","https://openalex.org/W4386566728","https://openalex.org/W4391021675","https://openalex.org/W6623517193","https://openalex.org/W6755559483","https://openalex.org/W6769196770","https://openalex.org/W6771467084","https://openalex.org/W6780218876","https://openalex.org/W6847363464","https://openalex.org/W6898634591"],"related_works":["https://openalex.org/W2338806053","https://openalex.org/W4385571610","https://openalex.org/W39235475","https://openalex.org/W2164147372","https://openalex.org/W2550171623","https://openalex.org/W4253660971","https://openalex.org/W4360995948","https://openalex.org/W1909292483","https://openalex.org/W1428730622","https://openalex.org/W1658560081"],"abstract_inverted_index":{"Speech":[0],"signals,":[1],"typically":[2],"sampled":[3],"at":[4],"rates":[5],"in":[6,17,134,146],"the":[7,30,33,45,59,76,101,137],"tens":[8],"of":[9,47,61,103],"thousands":[10],"per":[11],"second,":[12],"contain":[13],"redundancies,":[14],"evoking":[15],"inefficiencies":[16],"sequence":[18,78],"modeling.":[19],"High-dimensional":[20],"speech":[21,49,62,77,108,115,118],"features":[22],"such":[23,67],"as":[24,29,68],"spectrograms":[25],"are":[26,144],"often":[27],"used":[28],"input":[31],"for":[32],"subsequent":[34],"model.":[35],"However,":[36],"they":[37],"can":[38,73],"still":[39],"be":[40],"redundant.":[41],"Recent":[42],"investigations":[43],"proposed":[44],"use":[46],"discrete":[48,104,128],"units":[50,105,129],"derived":[51],"from":[52],"self-supervised":[53],"learning":[54],"representations,":[55],"which":[56],"significantly":[57,84],"compresses":[58],"size":[60],"data.":[63],"Applying":[64],"various":[65],"methods,":[66],"de-duplication":[69],"and":[70,97,120,141],"subword":[71],"modeling,":[72],"further":[74],"compress":[75],"length.":[79],"Hence,":[80],"training":[81],"time":[82],"is":[83],"reduced":[85],"while":[86],"retaining":[87],"notable":[88],"performance.":[89],"In":[90],"this":[91],"study,":[92],"we":[93],"undertake":[94],"a":[95],"comprehensive":[96],"systematic":[98],"exploration":[99],"into":[100],"application":[102],"within":[106],"end-to-end":[107],"processing":[109],"models.":[110],"Experiments":[111],"on":[112],"12":[113],"automatic":[114],"recognition,":[116],"3":[117],"translation,":[119],"1":[121],"spoken":[122],"language":[123],"understanding":[124],"corpora":[125],"demonstrate":[126],"that":[127],"achieve":[130],"reasonably":[131],"good":[132],"results":[133],"almost":[135],"all":[136],"settings.":[138],"Our":[139],"configurations":[140],"trained":[142],"models":[143],"released":[145],"ESPnet":[147],"to":[148],"foster":[149],"future":[150],"research":[151],"efforts.":[152]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":9}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
