{"id":"https://openalex.org/W4390096798","doi":"https://doi.org/10.1109/taslp.2023.3345150","title":"Universal Cross-Lingual Data Generation for Low Resource ASR","display_name":"Universal Cross-Lingual Data Generation for Low Resource ASR","publication_year":2023,"publication_date":"2023-12-22","ids":{"openalex":"https://openalex.org/W4390096798","doi":"https://doi.org/10.1109/taslp.2023.3345150"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2023.3345150","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3345150","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103087387","display_name":"Wei Wang","orcid":"https://orcid.org/0009-0006-9498-1592"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wei Wang","raw_affiliation_strings":["Auditory Cognition and Computational Acoustics Lab, the Department of Computer Science and Engineering and the MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Auditory Cognition and Computational Acoustics Lab, the Department of Computer Science and Engineering and the MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100341993","display_name":"Yanmin Qian","orcid":"https://orcid.org/0000-0002-0314-3790"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanmin Qian","raw_affiliation_strings":["Auditory Cognition and Computational Acoustics Lab, the Department of Computer Science and Engineering and the MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Auditory Cognition and Computational Acoustics Lab, the Department of Computer Science and Engineering and the MoE Key Laboratory of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5103087387"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.3491,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.67532553,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"32","issue":null,"first_page":"973","last_page":"983"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7977719902992249},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6640368700027466},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5174544453620911},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.4861304759979248},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.4788053631782532},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4757344722747803},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4575783610343933},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.41782280802726746},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1341928243637085}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7977719902992249},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6640368700027466},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5174544453620911},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.4861304759979248},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.4788053631782532},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4757344722747803},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4575783610343933},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.41782280802726746},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1341928243637085},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2023.3345150","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3345150","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W165119805","https://openalex.org/W2106440210","https://openalex.org/W2144499799","https://openalex.org/W2395387595","https://openalex.org/W2633221078","https://openalex.org/W2799800213","https://openalex.org/W2889028433","https://openalex.org/W2889385306","https://openalex.org/W2894835365","https://openalex.org/W2896457183","https://openalex.org/W2933138175","https://openalex.org/W2962699523","https://openalex.org/W2962704885","https://openalex.org/W2962780374","https://openalex.org/W2963292011","https://openalex.org/W2963303951","https://openalex.org/W2964309797","https://openalex.org/W2982223350","https://openalex.org/W2988736778","https://openalex.org/W3015419784","https://openalex.org/W3015585292","https://openalex.org/W3026041220","https://openalex.org/W3041561163","https://openalex.org/W3081416955","https://openalex.org/W3094667432","https://openalex.org/W3095184753","https://openalex.org/W3096338464","https://openalex.org/W3096485810","https://openalex.org/W3096710170","https://openalex.org/W3113594615","https://openalex.org/W3147900189","https://openalex.org/W3160799772","https://openalex.org/W3162244132","https://openalex.org/W3162425752","https://openalex.org/W3193461931","https://openalex.org/W3198429080","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3211278025","https://openalex.org/W3213029956","https://openalex.org/W4206662530","https://openalex.org/W4210463634","https://openalex.org/W4210811812","https://openalex.org/W4224919704","https://openalex.org/W4225272718","https://openalex.org/W4285144981","https://openalex.org/W4297841830","https://openalex.org/W4297841831","https://openalex.org/W4385245566","https://openalex.org/W6606618604","https://openalex.org/W6762242920","https://openalex.org/W6770514103","https://openalex.org/W6771467084","https://openalex.org/W6780218876","https://openalex.org/W6787141514","https://openalex.org/W6791904447","https://openalex.org/W6792680588","https://openalex.org/W6796464841","https://openalex.org/W6810007534","https://openalex.org/W6847363464"],"related_works":["https://openalex.org/W2389214306","https://openalex.org/W2965083567","https://openalex.org/W4235240664","https://openalex.org/W1838576100","https://openalex.org/W2757182831","https://openalex.org/W2095886385","https://openalex.org/W2089704382","https://openalex.org/W1983399550","https://openalex.org/W97075385","https://openalex.org/W2357523926"],"abstract_inverted_index":{"Significant":[0],"advances":[1],"in":[2,16,131,141,206,210],"end-to-end":[3],"(E2E)":[4],"automatic":[5],"speech":[6,44,78,91,98,128,200,204],"recognition":[7],"(ASR)":[8],"have":[9],"primarily":[10],"been":[11],"concentrated":[12],"on":[13,48,122,156],"languages":[14,24,95,169],"rich":[15],"annotated":[17],"data.":[18],"Nevertheless,":[19],"a":[20,42,118,176,183,197],"large":[21],"proportion":[22],"of":[23,113,136,192],"worldwide,":[25],"which":[26],"are":[27],"typically":[28],"low-resource,":[29],"continue":[30],"to":[31,126],"pose":[32],"significant":[33],"challenges.":[34],"To":[35,108],"address":[36],"this":[37,39,137],"issue,":[38],"study":[40],"presents":[41],"novel":[43],"synthesis":[45],"framework":[46,194],"based":[47,121],"data":[49,132,142,207],"splicing":[50,143],"that":[51,99],"leverages":[52],"self-supervised":[53],"learning":[54],"(SSL)":[55],"units":[56,72],"from":[57,93,105],"Hidden":[58],"Unit":[59],"BERT":[60],"(HuBERT)":[61],"as":[62,74],"universal":[63],"phonetic":[64,71],"units.":[65],"In":[66],"our":[67,193],"framework,":[68,115],"the":[69,111,114,127,157,190],"SSL":[70],"serve":[73],"crucial":[75],"bridges":[76],"between":[77],"and":[79,149,170],"text":[80,104],"across":[81],"different":[82],"languages.":[83,107],"By":[84],"leveraging":[85],"these":[86],"units,":[87],"we":[88,116,188],"successfully":[89],"splice":[90],"fragments":[92,205],"high-resource":[94],"into":[96],"synthesized":[97],"maintains":[100],"acoustic":[101],"coherence":[102],"with":[103],"low-resource":[106,185],"further":[109],"enhance":[110],"practicality":[112],"introduce":[117],"sampling":[119,139],"strategy":[120,140],"confidence":[123,138],"scores":[124],"assigned":[125],"segments":[129],"used":[130],"splicing.":[133],"The":[134],"application":[135],"significantly":[144],"accelerates":[145],"ASR":[146,152],"model":[147,179],"convergence":[148],"enhances":[150],"overall":[151],"performance.":[153],"Experimental":[154],"results":[155],"<sc":[158],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[159],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">CommonVoice</small>":[160],"dataset":[161],"show":[162],"25-35%":[163],"relative":[164,214],"improvement":[165],"for":[166,173,180,202],"four":[167],"Indo-European":[168],"about":[171],"20%":[172],"Turkish":[174],"using":[175],"4-gram":[177],"language":[178],"rescoring,":[181],"under":[182],"10-hour":[184],"setup.":[186],"Furthermore,":[187],"showcase":[189],"scalability":[191],"by":[195],"incorporating":[196],"larger":[198],"unsupervised":[199],"corpus":[201],"generating":[203],"splicing,":[208],"resulting":[209],"an":[211],"additional":[212],"10%":[213],"improvement.":[215]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
