{"id":"https://openalex.org/W4408355506","doi":"https://doi.org/10.1109/icassp49660.2025.10889634","title":"Adapting Whisper for Code-Switching through Encoding Refining and Language-Aware Decoding","display_name":"Adapting Whisper for Code-Switching through Encoding Refining and Language-Aware Decoding","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408355506","doi":"https://doi.org/10.1109/icassp49660.2025.10889634"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10889634","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889634","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101298861","display_name":"Jiahui Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiahui Zhao","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112457513","display_name":"Hao Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]},{"id":"https://openalex.org/I39012071","display_name":"Kyoto College of Graduate Studies for Informatics","ror":"https://ror.org/05mzj8a56","country_code":"JP","type":"education","lineage":["https://openalex.org/I39012071"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Hao Shi","raw_affiliation_strings":["Kyoto University,Graduate School of Informatics,Kyoto,Japan"],"affiliations":[{"raw_affiliation_string":"Kyoto University,Graduate School of Informatics,Kyoto,Japan","institution_ids":["https://openalex.org/I39012071","https://openalex.org/I22299242"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115637581","display_name":"Chenrui Cui","orcid":null},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenrui Cui","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080726804","display_name":"Tian\u2010Rui Wang","orcid":"https://orcid.org/0000-0003-1068-0321"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianrui Wang","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing,China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091096053","display_name":"Hexin Liu","orcid":"https://orcid.org/0000-0002-3998-9229"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Hexin Liu","raw_affiliation_strings":["Nanyang Technological University,College of Computing and Data Science,Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University,College of Computing and Data Science,Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031088292","display_name":"Zhaoheng Ni","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhaoheng Ni","raw_affiliation_strings":["Meta,New York,USA"],"affiliations":[{"raw_affiliation_string":"Meta,New York,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083071655","display_name":"Lingxuan Ye","orcid":"https://orcid.org/0009-0000-3548-9262"},"institutions":[{"id":"https://openalex.org/I4210099069","display_name":"Institute of Acoustics","ror":"https://ror.org/00v8rqv75","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210099069"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingxuan Ye","raw_affiliation_strings":["Institute of Acoustics,Key Laboratory of Speech Acoustics and Content Understanding,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Acoustics,Key Laboratory of Speech Acoustics and Content Understanding,Beijing,China","institution_ids":["https://openalex.org/I4210099069"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101745213","display_name":"Longbiao Wang","orcid":"https://orcid.org/0000-0002-8094-6861"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longbiao Wang","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing,China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing,China","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101298861"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":4.3637,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.93387239,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.5461000204086304,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.5461000204086304,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.7886799573898315},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7745550870895386},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.757777214050293},{"id":"https://openalex.org/keywords/refining","display_name":"Refining (metallurgy)","score":0.6430123448371887},{"id":"https://openalex.org/keywords/code-switching","display_name":"Code-switching","score":0.5211482048034668},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.5100212693214417},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4856761693954468},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.11642944812774658},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.1042473316192627},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.05642881989479065}],"concepts":[{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.7886799573898315},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7745550870895386},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.757777214050293},{"id":"https://openalex.org/C60044698","wikidata":"https://www.wikidata.org/wiki/Q1283324","display_name":"Refining (metallurgy)","level":2,"score":0.6430123448371887},{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.5211482048034668},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5100212693214417},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4856761693954468},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.11642944812774658},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.1042473316192627},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.05642881989479065},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C147789679","wikidata":"https://www.wikidata.org/wiki/Q11372","display_name":"Physical chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10889634","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889634","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W880632652","https://openalex.org/W3096122506","https://openalex.org/W3197580070","https://openalex.org/W3198429080","https://openalex.org/W4281770669","https://openalex.org/W4297841499","https://openalex.org/W4312096802","https://openalex.org/W4375869049","https://openalex.org/W4375869164","https://openalex.org/W4385822890","https://openalex.org/W4385822988","https://openalex.org/W4385823213","https://openalex.org/W4387448039","https://openalex.org/W4391021440","https://openalex.org/W4392902746","https://openalex.org/W4392903108","https://openalex.org/W4392903310","https://openalex.org/W4392903651","https://openalex.org/W4399168695","https://openalex.org/W4399265137","https://openalex.org/W4402111262","https://openalex.org/W4402111375","https://openalex.org/W4402111705","https://openalex.org/W4402111713","https://openalex.org/W4402112032","https://openalex.org/W4406461869","https://openalex.org/W4406858895","https://openalex.org/W4408355587","https://openalex.org/W4410087476","https://openalex.org/W6759579507","https://openalex.org/W6780218876","https://openalex.org/W6847363464","https://openalex.org/W6852909395"],"related_works":["https://openalex.org/W2368824897","https://openalex.org/W1508050556","https://openalex.org/W1910862367","https://openalex.org/W2379365082","https://openalex.org/W2370747590","https://openalex.org/W2030109976","https://openalex.org/W2369260257","https://openalex.org/W2389120450","https://openalex.org/W55249799","https://openalex.org/W1968289971"],"abstract_inverted_index":{"Code-switching":[0],"(CS)":[1],"automatic":[2],"speech":[3,46],"recognition":[4,47],"(ASR)":[5],"faces":[6],"challenges":[7],"due":[8],"to":[9,49,63,85,100,166],"the":[10,24,65,102,109,116,119,132,147,152,170],"language":[11,20,82,156],"confusion":[12],"resulting":[13],"from":[14,51],"accents,":[15],"auditory":[16],"similarity,":[17],"and":[18,54,129,134],"seamless":[19],"switches.":[21],"Adaptation":[22],"on":[23,131,154],"pre-trained":[25,45],"multi-lingual":[26],"model":[27],"has":[28],"shown":[29],"promising":[30],"performance":[31,153],"for":[32],"CS-ASR.":[33],"In":[34],"this":[35],"paper,":[36],"we":[37,58,72,144],"adapt":[38],"Whisper,":[39],"which":[40],"is":[41,98],"a":[42,95,123],"large-scale":[43],"multilingual":[44],"model,":[48,118],"CS":[50,158],"both":[52],"encoder":[53,61],"decoder":[55,92],"parts.":[56],"First,":[57],"propose":[59,73],"an":[60],"refiner":[62],"enhance":[64],"encoder\u2019s":[66],"capacity":[67],"of":[68,77,127],"intra-sentence":[69],"swithching.":[70],"Second,":[71],"using":[74,108],"two":[75,171],"sets":[76],"language-aware":[78,103],"adapters":[79],"with":[80,115],"different":[81],"prompt":[83],"embeddings":[84],"achieve":[86],"language-specific":[87],"decoding":[88],"information":[89],"in":[90,157],"each":[91],"layer.":[93],"Then,":[94],"fusion":[96],"module":[97],"added":[99],"fuse":[101],"decoding.":[104],"The":[105],"experimental":[106],"results":[107],"SEAME":[110],"dataset":[111],"show":[112],"that,":[113],"compared":[114],"baseline":[117],"proposed":[120,148],"approach":[121,163],"achieves":[122],"relative":[124],"MER":[125],"reduction":[126],"4.1%":[128],"7.2%":[130],"dev_man":[133],"dev_sge":[135],"test":[136],"sets,":[137],"respectively,":[138],"surpassing":[139],"state-of-the-art":[140],"methods.":[141],"Through":[142],"experiments,":[143],"found":[145],"that":[146,161],"method":[149],"significantly":[150],"improves":[151],"non-native":[155],"speech,":[159],"indicating":[160],"our":[162],"enables":[164],"Whisper":[165],"better":[167],"distinguish":[168],"between":[169],"languages.":[172]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
