{"id":"https://openalex.org/W4372349162","doi":"https://doi.org/10.1109/icassp49357.2023.10096189","title":"T5lephone: Bridging Speech and Text Self-Supervised Models for Spoken Language Understanding Via Phoneme Level T5","display_name":"T5lephone: Bridging Speech and Text Self-Supervised Models for Spoken Language Understanding Via Phoneme Level T5","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372349162","doi":"https://doi.org/10.1109/icassp49357.2023.10096189"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096189","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10096189","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059902893","display_name":"Chan-Jan Hsu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chan-Jan Hsu","raw_affiliation_strings":["MediaTek Research"],"affiliations":[{"raw_affiliation_string":"MediaTek Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111114748","display_name":"Ho-Lam Chung","orcid":"https://orcid.org/0000-0003-3847-0166"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Ho-Lam Chung","raw_affiliation_strings":["National Taiwan University,Taiwan","National Taiwan University, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University,Taiwan","institution_ids":["https://openalex.org/I16733864"]},{"raw_affiliation_string":"National Taiwan University, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040508737","display_name":"Hung-yi Lee","orcid":"https://orcid.org/0000-0002-9654-5747"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hung-Yi Lee","raw_affiliation_strings":["National Taiwan University,Taiwan","National Taiwan University, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University,Taiwan","institution_ids":["https://openalex.org/I16733864"]},{"raw_affiliation_string":"National Taiwan University, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044008055","display_name":"Yu Tsao","orcid":"https://orcid.org/0000-0001-6956-0418"},"institutions":[{"id":"https://openalex.org/I84653119","display_name":"Academia Sinica","ror":"https://ror.org/05bxb3784","country_code":"TW","type":"facility","lineage":["https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu Tsao","raw_affiliation_strings":["Academia Sinica,Taiwan","Academia Sinica, Taiwan"],"affiliations":[{"raw_affiliation_string":"Academia Sinica,Taiwan","institution_ids":["https://openalex.org/I84653119"]},{"raw_affiliation_string":"Academia Sinica, Taiwan","institution_ids":["https://openalex.org/I84653119"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5059902893"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5882,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.69714573,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"34","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8236472606658936},{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.7884407043457031},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7294930219650269},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.591549277305603},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5869247317314148},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.5154593586921692},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4445129632949829},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.4119187593460083}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8236472606658936},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.7884407043457031},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7294930219650269},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.591549277305603},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5869247317314148},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.5154593586921692},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4445129632949829},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.4119187593460083}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096189","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10096189","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8199999928474426,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2557764419","https://openalex.org/W2896457183","https://openalex.org/W2951831170","https://openalex.org/W2963748441","https://openalex.org/W2981458636","https://openalex.org/W3001434439","https://openalex.org/W3015468748","https://openalex.org/W3034999214","https://openalex.org/W3122890974","https://openalex.org/W3148001440","https://openalex.org/W3162037819","https://openalex.org/W3164045210","https://openalex.org/W3173767661","https://openalex.org/W3176711365","https://openalex.org/W3196509775","https://openalex.org/W3197580070","https://openalex.org/W3200129129","https://openalex.org/W3207654254","https://openalex.org/W4226103796","https://openalex.org/W4285217170","https://openalex.org/W4285250921","https://openalex.org/W4287173589","https://openalex.org/W4288089799","https://openalex.org/W4296068815","https://openalex.org/W6769627184","https://openalex.org/W6795952400"],"related_works":["https://openalex.org/W4386014872","https://openalex.org/W1847536016","https://openalex.org/W4361193986","https://openalex.org/W4378498597","https://openalex.org/W3101140821","https://openalex.org/W4287816966","https://openalex.org/W3015650676","https://openalex.org/W4389520445","https://openalex.org/W4387800341","https://openalex.org/W4388347706"],"abstract_inverted_index":{"In":[0,56],"Spoken":[1],"language":[2,17,27,46,72],"understanding":[3,73],"(SLU),":[4],"a":[5,93],"natural":[6],"solution":[7],"is":[8,54,98,140],"concatenating":[9],"pre-trained":[10,26],"speech":[11,42,81],"models":[12,18,28],"(e.g.":[13],"HuBERT)":[14],"and":[15,45,49,80,122,136],"pretrained":[16,99],"(PLM,":[19],"e.g.":[20],"T5).":[21],"Most":[22],"previous":[23],"works":[24],"use":[25],"with":[29,51,66,106,128],"subword-based":[30],"tokenization.":[31],"However,":[32],"the":[33,39,86,123],"granularity":[34],"of":[35,41,95,131],"input":[36],"units":[37,132],"affects":[38],"alignment":[40],"model":[43,47,125],"outputs":[44],"inputs,":[48],"PLM":[50],"character-based":[52],"tokenization":[53,68],"underexplored.":[55],"this":[57],"work,":[58],"we":[59],"conduct":[60],"extensive":[61],"studies":[62],"on":[63,120,133],"how":[64],"PLMs":[65,108],"different":[67],"strategies":[69],"affect":[70],"spoken":[71,76],"task":[74],"including":[75],"question":[77],"answering":[78],"(SQA)":[79],"translation":[82],"(ST).We":[83],"further":[84],"extend":[85],"idea":[87],"to":[88,109],"create":[89],"T5lephone<sup":[90],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[91,143],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>,":[92],"variant":[94],"T5":[96,127],"that":[97],"using":[100,112],"phonemicized":[101],"text.":[102],"We":[103,117],"initialize":[104],"T5lephone":[105,124],"existing":[107],"pretrain":[110],"it":[111],"relatively":[113],"lightweight":[114],"computational":[115],"resources.":[116],"reached":[118],"state-of-the-art":[119],"NMSQA,":[121],"exceeds":[126],"other":[129],"types":[130],"end-to-end":[134],"SQA":[135],"ST.":[137],"Our":[138],"code":[139],"publicly":[141],"available.<sup":[142],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">2</sup>":[144]},"counts_by_year":[{"year":2023,"cited_by_count":3}],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
