{"id":"https://openalex.org/W4319782197","doi":"https://doi.org/10.1109/iscslp57327.2022.10037997","title":"Towards Language-universal Mandarin-English Speech Recognition with Unsupervised Label Synchronous Adaptation","display_name":"Towards Language-universal Mandarin-English Speech Recognition with Unsupervised Label Synchronous Adaptation","publication_year":2022,"publication_date":"2022-12-11","ids":{"openalex":"https://openalex.org/W4319782197","doi":"https://doi.org/10.1109/iscslp57327.2022.10037997"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp57327.2022.10037997","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp57327.2022.10037997","pdf_url":null,"source":{"id":"https://openalex.org/S4363607181","display_name":"2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100448180","display_name":"Song Li","orcid":"https://orcid.org/0000-0001-5611-1517"},"institutions":[{"id":"https://openalex.org/I75867142","display_name":"Xiamen University of Technology","ror":"https://ror.org/01285e189","country_code":"CN","type":"education","lineage":["https://openalex.org/I75867142"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Song Li","raw_affiliation_strings":["Xiamen University,School of Electronic Science and Technology,China","School of Electronic Science and Technology, Xiamen University, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Electronic Science and Technology,China","institution_ids":["https://openalex.org/I75867142"]},{"raw_affiliation_string":"School of Electronic Science and Technology, Xiamen University, China","institution_ids":["https://openalex.org/I75867142"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088820402","display_name":"Haoneng Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haoneng Luo","raw_affiliation_strings":["Alibaba Group,Speech Lab","Speech Lab, Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group,Speech Lab","institution_ids":["https://openalex.org/I4210095624"]},{"raw_affiliation_string":"Speech Lab, Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034717070","display_name":"Wenxuan Hu","orcid":"https://orcid.org/0000-0003-1222-459X"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenxuan Hu","raw_affiliation_strings":["Xiamen University,School of Informatics,China","School of Informatics, Xiamen University, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,China","institution_ids":["https://openalex.org/I191208505"]},{"raw_affiliation_string":"School of Informatics, Xiamen University, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069339294","display_name":"Yuan Liu","orcid":"https://orcid.org/0000-0002-2578-0707"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuan Liu","raw_affiliation_strings":["Alibaba Group,Speech Lab","Speech Lab, Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group,Speech Lab","institution_ids":["https://openalex.org/I4210095624"]},{"raw_affiliation_string":"Speech Lab, Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101777591","display_name":"Shiliang Zhang","orcid":"https://orcid.org/0000-0002-9524-1602"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shiliang Zhang","raw_affiliation_strings":["Alibaba Group,Speech Lab","Speech Lab, Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group,Speech Lab","institution_ids":["https://openalex.org/I4210095624"]},{"raw_affiliation_string":"Speech Lab, Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100412926","display_name":"Lin Li","orcid":"https://orcid.org/0000-0003-0426-6546"},"institutions":[{"id":"https://openalex.org/I75867142","display_name":"Xiamen University of Technology","ror":"https://ror.org/01285e189","country_code":"CN","type":"education","lineage":["https://openalex.org/I75867142"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Li","raw_affiliation_strings":["Xiamen University,School of Electronic Science and Technology,China","School of Electronic Science and Technology, Xiamen University, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Electronic Science and Technology,China","institution_ids":["https://openalex.org/I75867142"]},{"raw_affiliation_string":"School of Electronic Science and Technology, Xiamen University, China","institution_ids":["https://openalex.org/I75867142"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011997254","display_name":"Qingyang Hong","orcid":"https://orcid.org/0000-0001-7380-8690"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingyang Hong","raw_affiliation_strings":["Xiamen University,School of Informatics,China","School of Informatics, Xiamen University, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,China","institution_ids":["https://openalex.org/I191208505"]},{"raw_affiliation_string":"School of Informatics, Xiamen University, China","institution_ids":["https://openalex.org/I191208505"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100448180"],"corresponding_institution_ids":["https://openalex.org/I75867142"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20458953,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"16","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9837999939918518,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8418881297111511},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7397346496582031},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.7363322973251343},{"id":"https://openalex.org/keywords/pronunciation","display_name":"Pronunciation","score":0.5873981714248657},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5070273876190186},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4926617741584778},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.47998473048210144},{"id":"https://openalex.org/keywords/code-switching","display_name":"Code-switching","score":0.4701195955276489},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4506233334541321},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4485732316970825},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4262325167655945},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.41615790128707886},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.12277346849441528},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10340768098831177}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8418881297111511},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7397346496582031},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.7363322973251343},{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.5873981714248657},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5070273876190186},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4926617741584778},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.47998473048210144},{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.4701195955276489},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4506233334541321},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4485732316970825},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4262325167655945},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.41615790128707886},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.12277346849441528},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10340768098831177},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp57327.2022.10037997","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp57327.2022.10037997","pdf_url":null,"source":{"id":"https://openalex.org/S4363607181","display_name":"2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 13th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5400000214576721,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1488156371","https://openalex.org/W1522301498","https://openalex.org/W1985204196","https://openalex.org/W2084543186","https://openalex.org/W2271840356","https://openalex.org/W2314030657","https://openalex.org/W2327501763","https://openalex.org/W2766219058","https://openalex.org/W2786835190","https://openalex.org/W2889068726","https://openalex.org/W2890964092","https://openalex.org/W2892009249","https://openalex.org/W2939757332","https://openalex.org/W2962824709","https://openalex.org/W2972417954","https://openalex.org/W3015457435","https://openalex.org/W3096032230","https://openalex.org/W3096122506","https://openalex.org/W3097306574","https://openalex.org/W3097338456","https://openalex.org/W3097580812","https://openalex.org/W3097777922","https://openalex.org/W3134568285","https://openalex.org/W3156323585","https://openalex.org/W3197932033","https://openalex.org/W4287726212","https://openalex.org/W4322588869","https://openalex.org/W4385245566","https://openalex.org/W4402504235","https://openalex.org/W6631190155","https://openalex.org/W6687566353","https://openalex.org/W6694517276","https://openalex.org/W6756358366","https://openalex.org/W6780612146"],"related_works":["https://openalex.org/W2374317326","https://openalex.org/W2990005675","https://openalex.org/W1603321096","https://openalex.org/W2394766824","https://openalex.org/W2078713291","https://openalex.org/W2361574037","https://openalex.org/W2386292991","https://openalex.org/W2364440891","https://openalex.org/W2393726922","https://openalex.org/W2366752344"],"abstract_inverted_index":{"End-to-end":[0],"multilingual":[1,24],"and":[2,25,28,56,113,136,142],"code-switching":[3,26,114,134],"speech":[4,39,52,78,90,104],"recognition":[5,79,91,105],"are":[6,11],"two":[7,44],"challenging":[8],"tasks":[9],"that":[10,102,119],"studied":[12],"separately":[13],"in":[14,86,132],"many":[15],"previous":[16],"works.":[17],"In":[18],"this":[19],"work,":[20],"we":[21,42],"jointly":[22],"study":[23],"problems":[27],"present":[29],"a":[30,129],"novel":[31],"unsupervised":[32],"label":[33],"synchronous":[34],"adaptation":[35],"algorithm":[36,122],"for":[37,98,110],"Mandarin-English":[38],"recognition.":[40],"Specifically,":[41],"use":[43],"parallel":[45,87],"encoders":[46],"to":[47,93],"decompose":[48],"the":[49,72,77,89,103,133,139],"Mel-spectrum":[50],"of":[51,67,76],"into":[53],"semantic":[54],"information":[55],"other":[57],"acoustic":[58],"attributes,":[59],"such":[60],"as":[61],"speaker":[62],"identity,":[63],"accents,":[64],"pronunciation":[65],"characteristics":[66],"different":[68],"languages,":[69],"etc.":[70],"During":[71],"autoregressive":[73],"decoding":[74],"process":[75],"system,":[80],"an":[81,95],"adaptive":[82,96,109],"decoder":[83,92],"is":[84],"used":[85],"with":[88],"generate":[94],"embedding":[97],"each":[99],"character,":[100],"so":[101],"model":[106],"can":[107],"be":[108],"Mandarin,":[111],"English,":[112],"cases.":[115],"Our":[116],"experiments":[117],"show":[118],"our":[120],"proposed":[121],"obtains":[123],"13.5%":[124],"relative":[125],"error":[126],"reduction":[127],"over":[128],"strong":[130],"baseline":[131],"case,":[135],"outperforms":[137],"both":[138],"state-of-the-art":[140],"Mandarin":[141],"English":[143],"monolingual":[144],"models.":[145]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
