{"id":"https://openalex.org/W4408861574","doi":"https://doi.org/10.1109/icce63647.2025.10929894","title":"Extending Whisper for Korean-English Code-switching Speech Recognition","display_name":"Extending Whisper for Korean-English Code-switching Speech Recognition","publication_year":2025,"publication_date":"2025-01-11","ids":{"openalex":"https://openalex.org/W4408861574","doi":"https://doi.org/10.1109/icce63647.2025.10929894"},"language":"en","primary_location":{"id":"doi:10.1109/icce63647.2025.10929894","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icce63647.2025.10929894","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Consumer Electronics (ICCE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109139408","display_name":"H. S. Seong","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Hyeonggi Seong","raw_affiliation_strings":["Seoul National University"],"affiliations":[{"raw_affiliation_string":"Seoul National University","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000717294","display_name":"Nam-Joon Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Nam-Joon Kim","raw_affiliation_strings":["Seoul National University"],"affiliations":[{"raw_affiliation_string":"Seoul National University","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023354297","display_name":"Hyun Gon Ryu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hyun Gon Ryu","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102861073","display_name":"Hyuk-Jae Lee","orcid":"https://orcid.org/0000-0001-8895-9117"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyuk-Jae Lee","raw_affiliation_strings":["Seoul National University"],"affiliations":[{"raw_affiliation_string":"Seoul National University","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5109139408"],"corresponding_institution_ids":["https://openalex.org/I139264467"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02454729,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9840999841690063,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9840999841690063,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9128999710083008,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code-switching","display_name":"Code-switching","score":0.7832561135292053},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7485347390174866},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5752394199371338},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3612736463546753},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.33408835530281067},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.25730374455451965}],"concepts":[{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.7832561135292053},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7485347390174866},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5752394199371338},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3612736463546753},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.33408835530281067},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.25730374455451965},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icce63647.2025.10929894","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icce63647.2025.10929894","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Consumer Electronics (ICCE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W3015445672","https://openalex.org/W3091427154","https://openalex.org/W3096122506","https://openalex.org/W4297150273","https://openalex.org/W4297841499","https://openalex.org/W4385822875","https://openalex.org/W4385822890","https://openalex.org/W6847363464"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2771594921","https://openalex.org/W2432897346","https://openalex.org/W2181336723","https://openalex.org/W4389976243","https://openalex.org/W3158134258","https://openalex.org/W3138119129","https://openalex.org/W2940588741"],"abstract_inverted_index":{"Among":[0],"various":[1,17],"automatic":[2],"speech":[3,27,96],"recognition":[4],"(ASR)":[5],"models,":[6],"Whisper":[7,21,47,83],"is":[8],"a":[9,34,50,55,92,99,109,121],"state-of-the-art":[10],"model":[11,53],"that":[12,48],"demonstrates":[13],"robust":[14],"performance":[15,24],"across":[16],"multilingual":[18],"tasks.":[19],"However,":[20],"shows":[22],"poor":[23],"in":[25,102,112],"code-switching":[26,87,95],"where":[28],"multiple":[29,66],"languages":[30],"are":[31],"used":[32],"within":[33],"single":[35],"sentence.":[36],"To":[37],"address":[38],"this":[39],"limitation,":[40],"we":[41],"propose":[42],"an":[43],"extended":[44],"version":[45],"of":[46],"utilizes":[49],"language":[51,67,79],"detector":[52],"with":[54,119],"modified":[56],"token":[57,64],"sequence":[58],"format.":[59],"Unlike":[60],"the":[61,78,103,113,130],"original":[62,131],"Whisper's":[63],"sequence,":[65],"tokens":[68],"can":[69],"be":[70],"assigned":[71],"to":[72,84,129],"one":[73],"sentence":[74],"using":[75],"information":[76],"from":[77],"detector,":[80],"which":[81],"enables":[82],"effectively":[85],"process":[86],"speech.":[88],"Our":[89],"experiments":[90],"on":[91],"one-hour":[93],"Korean-English":[94],"dataset":[97],"showed":[98],"1.7%":[100],"reduction":[101,111],"character":[104],"error":[105,116],"rate":[106,117],"(CER)":[107],"and":[108],"0.9%":[110],"space-normalized":[114],"word":[115],"(sWER)":[118],"only":[120],"1.12":[122],"\u00d7":[123],"higher":[124],"relative":[125],"time":[126],"complexity":[127],"compared":[128],"Whisper.":[132]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
