{"id":"https://openalex.org/W3015591979","doi":"https://doi.org/10.1109/icassp40776.2020.9053752","title":"Addressing Accent Mismatch In Mandarin-English Code-Switching Speech Recognition","display_name":"Addressing Accent Mismatch In Mandarin-English Code-Switching Speech Recognition","publication_year":2020,"publication_date":"2020-04-09","ids":{"openalex":"https://openalex.org/W3015591979","doi":"https://doi.org/10.1109/icassp40776.2020.9053752","mag":"3015591979"},"language":"en","primary_location":{"id":"doi:10.1109/icassp40776.2020.9053752","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053752","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036359785","display_name":"Zhili Tan","orcid":"https://orcid.org/0000-0002-2445-5240"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhili Tan","raw_affiliation_strings":["Microsoft, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101423837","display_name":"Xinghua Fan","orcid":"https://orcid.org/0000-0003-0645-3214"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinghua Fan","raw_affiliation_strings":["Microsoft, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037426892","display_name":"Hui Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Zhu","raw_affiliation_strings":["Microsoft, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062230378","display_name":"Ed Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ed Lin","raw_affiliation_strings":["Microsoft, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, China","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5036359785"],"corresponding_institution_ids":["https://openalex.org/I4210113369"],"apc_list":null,"apc_paid":null,"fwci":0.686,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.75959939,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"8259","last_page":"8263"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8317563533782959},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7740451097488403},{"id":"https://openalex.org/keywords/code-switching","display_name":"Code-switching","score":0.7603614330291748},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.7233537435531616},{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.5901141166687012},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5369695425033569},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5092347860336304},{"id":"https://openalex.org/keywords/stress","display_name":"Stress (linguistics)","score":0.4700765907764435},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4621274173259735},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.44851261377334595},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4466361403465271},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.4395032525062561},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.33627742528915405},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.15172353386878967}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8317563533782959},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7740451097488403},{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.7603614330291748},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.7233537435531616},{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.5901141166687012},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5369695425033569},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5092347860336304},{"id":"https://openalex.org/C2776756274","wikidata":"https://www.wikidata.org/wiki/Q181767","display_name":"Stress (linguistics)","level":2,"score":0.4700765907764435},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4621274173259735},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.44851261377334595},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4466361403465271},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.4395032525062561},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.33627742528915405},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.15172353386878967},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp40776.2020.9053752","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp40776.2020.9053752","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5400000214576721,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1593247906","https://openalex.org/W1989549063","https://openalex.org/W2102113734","https://openalex.org/W2102261232","https://openalex.org/W2127141656","https://openalex.org/W2130414229","https://openalex.org/W2147768505","https://openalex.org/W2160815625","https://openalex.org/W2327501763","https://openalex.org/W2631415506","https://openalex.org/W2883989418","https://openalex.org/W2899299209","https://openalex.org/W2937754410","https://openalex.org/W2939069254","https://openalex.org/W2939757332","https://openalex.org/W2944255943","https://openalex.org/W2962765220","https://openalex.org/W2963403868","https://openalex.org/W2964156075","https://openalex.org/W2972417954","https://openalex.org/W2972702443","https://openalex.org/W2973082572","https://openalex.org/W3103005696","https://openalex.org/W4385245566","https://openalex.org/W6675365184","https://openalex.org/W6739901393","https://openalex.org/W6755574360"],"related_works":["https://openalex.org/W2031478549","https://openalex.org/W3048205211","https://openalex.org/W2163874654","https://openalex.org/W3081187864","https://openalex.org/W4380605396","https://openalex.org/W2803306015","https://openalex.org/W3133352777","https://openalex.org/W151018310","https://openalex.org/W2784059283","https://openalex.org/W4319779560"],"abstract_inverted_index":{"Automatic":[0],"speech":[1,33,72],"recognition":[2,69,89],"systems":[3],"suffer":[4],"from":[5],"accuracy":[6],"degradation":[7],"when":[8],"code-switching":[9,44,79,88],"(multiple":[10],"languages":[11],"are":[12],"spoken":[13,46],"in":[14,93],"a":[15,30],"single":[16],"utterance)":[17],"is":[18,21,29],"encountered.":[19],"This":[20],"especially":[22],"common":[23],"for":[24],"non-native":[25],"speakers":[26,50],"where":[27],"there":[28],"mismatch":[31],"between":[32],"and":[34,51,62],"acoustic":[35],"model.":[36],"In":[37],"this":[38],"paper,":[39],"we":[40],"experiment":[41],"on":[42,77],"Mandarin-English":[43],"audio":[45],"by":[47],"native":[48],"Chinese":[49],"evaluate":[52],"three":[53],"techniques":[54],"to":[55,75,86],"improve":[56],"accuracy-data":[57],"adaptation,":[58],"individual":[59],"senone":[60],"modeling":[61],"lexicon":[63],"enrichment.":[64],"Our":[65],"results":[66],"show":[67],"the":[68],"of":[70],"accented":[71],"improves":[73],"up":[74],"12%":[76],"various":[78],"datasets.":[80],"We":[81],"also":[82],"propose":[83],"several":[84],"metrics":[85],"measure":[87],"quality,":[90],"not":[91],"captured":[92],"typical":[94],"word":[95],"error":[96],"rate":[97],"(WER)":[98],"measurement.":[99]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}
