{"id":"https://openalex.org/W4307308046","doi":"https://doi.org/10.48550/arxiv.2210.12214","title":"Optimizing Bilingual Neural Transducer with Synthetic Code-switching Text Generation","display_name":"Optimizing Bilingual Neural Transducer with Synthetic Code-switching Text Generation","publication_year":2022,"publication_date":"2022-10-21","ids":{"openalex":"https://openalex.org/W4307308046","doi":"https://doi.org/10.48550/arxiv.2210.12214"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2210.12214","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.12214","pdf_url":"https://arxiv.org/pdf/2210.12214","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2210.12214","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026113034","display_name":"Thien Huu Nguyen","orcid":"https://orcid.org/0000-0003-3768-4736"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nguyen, Thien","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029122008","display_name":"Nathalie Tran","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tran, Nathalie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108296148","display_name":"Liuhui Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Liuhui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038994899","display_name":"Thiago Fraga da Silva","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"da Silva, Thiago Fraga","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025534909","display_name":"Matthew Radzihovsky","orcid":"https://orcid.org/0000-0002-0925-9161"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Radzihovsky, Matthew","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033764781","display_name":"Roger Hsiao","orcid":"https://orcid.org/0000-0001-8638-4508"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hsiao, Roger","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033446148","display_name":"H. Benjamin Mason","orcid":"https://orcid.org/0000-0003-4279-2854"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mason, Henry","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083904404","display_name":"Stefan Braun","orcid":"https://orcid.org/0000-0002-7145-1103"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Braun, Stefan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030109648","display_name":"Erik McDermott","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McDermott, Erik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112207449","display_name":"Do\u011fan Can","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Can, Dogan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084169592","display_name":"Pawe\u0142 \u015awi\u0119toja\u0144ski","orcid":"https://orcid.org/0000-0001-5896-4505"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Swietojanski, Pawel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029549568","display_name":"Lyan Verwimp","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Verwimp, Lyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024558318","display_name":"Sibel Oyman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oyman, Sibel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051482015","display_name":"Tresi Arvizo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arvizo, Tresi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050533390","display_name":"Honza Silovsky","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Silovsky, Honza","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016623654","display_name":"Arnab Ghoshal","orcid":"https://orcid.org/0000-0003-1272-2599"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghoshal, Arnab","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083290115","display_name":"Mathieu Martel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Martel, Mathieu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018544671","display_name":"Bharat Ram Ambati","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ambati, Bharat Ram","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101736792","display_name":"Mohamed Ali","orcid":"https://orcid.org/0000-0002-1455-9679"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ali, Mohamed","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":19,"corresponding_author_ids":["https://openalex.org/A5026113034"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8126446604728699},{"id":"https://openalex.org/keywords/code-switching","display_name":"Code-switching","score":0.7948501706123352},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.7177754044532776},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6390678286552429},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6240657567977905},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5728450417518616},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5681881308555603},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5489001870155334},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.5139100551605225},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4458836615085602},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.43389105796813965},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4147280156612396},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3462839126586914},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.14713424444198608}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8126446604728699},{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.7948501706123352},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.7177754044532776},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6390678286552429},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6240657567977905},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5728450417518616},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5681881308555603},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5489001870155334},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.5139100551605225},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4458836615085602},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.43389105796813965},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4147280156612396},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3462839126586914},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14713424444198608},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2210.12214","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.12214","pdf_url":"https://arxiv.org/pdf/2210.12214","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2210.12214","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2210.12214","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2210.12214","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.12214","pdf_url":"https://arxiv.org/pdf/2210.12214","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.7699999809265137,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2990005675","https://openalex.org/W2374317326","https://openalex.org/W1603321096","https://openalex.org/W2394766824","https://openalex.org/W2078713291","https://openalex.org/W2361574037","https://openalex.org/W2386292991","https://openalex.org/W2364440891","https://openalex.org/W2393726922","https://openalex.org/W1577263324"],"abstract_inverted_index":{"Code-switching":[0],"describes":[1],"the":[2,11,37,40,60,72,91,105,113,120,129],"practice":[3],"of":[4,71],"using":[5],"more":[6],"than":[7],"one":[8],"language":[9],"in":[10],"same":[12],"sentence.":[13],"In":[14],"this":[15],"study,":[16],"we":[17,49],"investigate":[18],"how":[19,69],"to":[20,119],"optimize":[21],"a":[22],"neural":[23,73],"transducer":[24],"based":[25],"bilingual":[26,61],"automatic":[27],"speech":[28],"recognition":[29],"(ASR)":[30],"model":[31,42],"for":[32],"code-switching":[33,47,65,78,108],"speech.":[34,66],"Focusing":[35],"on":[36,64,90,104,128],"scenario":[38],"where":[39],"ASR":[41,62],"is":[43],"trained":[44],"without":[45],"supervised":[46],"data,":[48],"found":[50],"that":[51],"semi-supervised":[52],"training":[53],"and":[54,85],"synthetic":[55],"code-switched":[56],"data":[57,93],"can":[58],"improve":[59],"system":[63,89,97],"We":[67],"analyze":[68],"each":[70],"transducer's":[74],"encoders":[75],"contributes":[76],"towards":[77],"performance":[79],"by":[80,115],"measuring":[81],"encoder-specific":[82],"recall":[83],"values,":[84],"evaluate":[86],"our":[87],"English/Mandarin":[88,107],"ASCEND":[92,106],"set.":[94],"Our":[95],"final":[96],"achieves":[98],"25%":[99],"mixed":[100],"error":[101],"rate":[102],"(MER)":[103],"test":[109,131],"set":[110],"--":[111,123],"reducing":[112],"MER":[114],"2.1%":[116],"absolute":[117],"compared":[118],"previous":[121],"literature":[122],"while":[124],"maintaining":[125],"good":[126],"accuracy":[127],"monolingual":[130],"sets.":[132]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
