{"id":"https://openalex.org/W4399240439","doi":"https://doi.org/10.1142/s2717554524500012","title":"Voices of the Himalayas: Benchmarking Speech Recognition Systems for the Tibetan Language","display_name":"Voices of the Himalayas: Benchmarking Speech Recognition Systems for the Tibetan Language","publication_year":2024,"publication_date":"2024-03-01","ids":{"openalex":"https://openalex.org/W4399240439","doi":"https://doi.org/10.1142/s2717554524500012"},"language":"en","primary_location":{"id":"doi:10.1142/s2717554524500012","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s2717554524500012","pdf_url":null,"source":{"id":"https://openalex.org/S4210231678","display_name":"International Journal of Asian Language Processing","issn_l":"2424-791X","issn":["2424-791X","2717-5545"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Asian Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053726259","display_name":"Sheng Li","orcid":"https://orcid.org/0000-0001-7636-3797"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Sheng Li","raw_affiliation_strings":["National Institute of Information and Communications Technology, Soraku-gun, Kyoto 619-0289, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology, Soraku-gun, Kyoto 619-0289, Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067208985","display_name":"Jiyi Li","orcid":"https://orcid.org/0000-0003-4997-3850"},"institutions":[{"id":"https://openalex.org/I66906201","display_name":"University of Yamanashi","ror":"https://ror.org/059x21724","country_code":"JP","type":"education","lineage":["https://openalex.org/I66906201"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jiyi Li","raw_affiliation_strings":["University of Yamanashi, Kofu, Japan"],"affiliations":[{"raw_affiliation_string":"University of Yamanashi, Kofu, Japan","institution_ids":["https://openalex.org/I66906201"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102757632","display_name":"Chenhui Chu","orcid":"https://orcid.org/0000-0001-9848-6384"},"institutions":[{"id":"https://openalex.org/I22299242","display_name":"Kyoto University","ror":"https://ror.org/02kpeqv85","country_code":"JP","type":"education","lineage":["https://openalex.org/I22299242"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Chenhui Chu","raw_affiliation_strings":["Graduate School of Informatics, Kyoto University, Sakyo-ku, Kyoto 606-8501, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Informatics, Kyoto University, Sakyo-ku, Kyoto 606-8501, Japan","institution_ids":["https://openalex.org/I22299242"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5053726259"],"corresponding_institution_ids":["https://openalex.org/I90023481"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05958827,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"34","issue":"01","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.7648329734802246},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7588643431663513},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5416905879974365},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48195526003837585},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.4670366942882538},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.447033554315567},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.415641725063324},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.37051165103912354},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.11297792196273804}],"concepts":[{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.7648329734802246},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7588643431663513},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5416905879974365},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48195526003837585},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.4670366942882538},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.447033554315567},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.415641725063324},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37051165103912354},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.11297792196273804},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1142/s2717554524500012","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s2717554524500012","pdf_url":null,"source":{"id":"https://openalex.org/S4210231678","display_name":"International Journal of Asian Language Processing","issn_l":"2424-791X","issn":["2424-791X","2717-5545"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Asian Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:t2r2.star.titech.ac.jp:50728040","is_oa":false,"landing_page_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100930618","pdf_url":null,"source":{"id":"https://openalex.org/S4377196385","display_name":"Tokyo Tech Research Repository (Tokyo Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114531698","host_organization_name":"Tokyo Institute of Technology","host_organization_lineage":["https://openalex.org/I114531698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Journal Article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4390204761","display_name":null,"funder_award_id":"JP23K11227","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2085628288","https://openalex.org/W2122364000","https://openalex.org/W2125838338","https://openalex.org/W2127141656","https://openalex.org/W2147768505","https://openalex.org/W2327501763","https://openalex.org/W2395416438","https://openalex.org/W2573846420","https://openalex.org/W2595840341","https://openalex.org/W2613532868","https://openalex.org/W2627092829","https://openalex.org/W2766219058","https://openalex.org/W2797870159","https://openalex.org/W2889282842","https://openalex.org/W2890197052","https://openalex.org/W2892009249","https://openalex.org/W2962778134","https://openalex.org/W2962780374","https://openalex.org/W2963211739","https://openalex.org/W2972557594","https://openalex.org/W2972629567","https://openalex.org/W3011600413","https://openalex.org/W3012217511","https://openalex.org/W3015721889","https://openalex.org/W3198689362","https://openalex.org/W4206191520","https://openalex.org/W4296068994","https://openalex.org/W4310895557","https://openalex.org/W4313203122"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W2136763963","https://openalex.org/W2109705048","https://openalex.org/W2940588515","https://openalex.org/W1909151225","https://openalex.org/W1987783679","https://openalex.org/W2160030256","https://openalex.org/W1521297879","https://openalex.org/W4253235840","https://openalex.org/W3151937861"],"abstract_inverted_index":{"The":[0],"Lhasa":[1,29,120],"dialect,":[2],"the":[3,28,60,68,80,84,91,100,119,162,166,177,189,210],"most":[4],"widely":[5],"spoken":[6],"Tibetan":[7,42],"dialect":[8,30],"in":[9,33,41,212],"Tibet,":[10],"is":[11,31],"also":[12],"renowned":[13],"for":[14,118,173],"its":[15],"rich":[16],"historical":[17],"archive":[18],"of":[19,62,70,86],"written":[20],"scripts.":[21],"Exploring":[22],"speech":[23,43,116],"recognition":[24,44,117,123],"methodologies":[25],"specific":[26],"to":[27,49,83,114,142,149],"paramount":[32],"safeguarding":[34],"Tibet\u2019s":[35],"distinct":[36],"linguistic":[37],"heritage.":[38],"Previous":[39],"studies":[40,75],"have":[45,76],"been":[46],"largely":[47],"confined":[48],"academic":[50],"research":[51],"using":[52,181],"nonpublic":[53],"datasets,":[54],"focusing":[55],"on":[56,198],"elements":[57],"such":[58],"as":[59],"selection":[61],"phone-level":[63],"acoustic":[64],"modeling":[65],"units":[66],"and":[67,147,160,191,196],"integration":[69],"tonal":[71],"information.":[72],"However,":[73],"these":[74,206],"not":[77],"significantly":[78],"benefited":[79],"community":[81],"due":[82],"scarcity":[85],"available":[87],"data.":[88],"To":[89],"mitigate":[90],"challenge":[92],"posed":[93],"by":[94,194],"limited":[95],"data":[96],"resources,":[97],"we":[98,183],"present":[99],"NICT-Tib1":[101],"(phase":[102],"1)":[103],"dataset,":[104,200],"a":[105],"new":[106],"open-source":[107],"dataset":[108],"collected":[109],"from":[110,132],"native":[111],"speakers":[112],"dedicated":[113],"investigating":[115],"dialect.":[121],"Speech":[122],"with":[124,135,165,188],"deep":[125],"neural":[126],"networks":[127],"(DNNs)":[128],"evolved":[129],"three":[130],"generations":[131],"systems":[133,144,153,187],"hybrid":[134],"hidden":[136],"Markov":[137],"model":[138],"(HMM)":[139],"(e.g.,":[140,145,154],"DNN-HMM)":[141],"End-to-End":[143],"Transformer),":[146],"finally":[148],"self-supervised":[150],"learning":[151],"(SSL)":[152],"Wav2Vec2.0),":[155],"each":[156],"generation":[157,168],"improving":[158],"accuracy":[159],"simplifying":[161],"training":[163],"process,":[164],"latest":[167],"achieving":[169],"state-of-the-art":[170,207],"performance,":[171],"especially":[172],"low-resource":[174],"languages.":[175],"Besides":[176],"early":[178],"DNN-HMM-based":[179],"system":[180],"Kaldi,":[182],"further":[184],"update":[185],"benchmark":[186],"Conformer":[190],"Wav2Vec2.0":[192],"trained":[193],"ESPnet":[195],"Huggingface":[197],"this":[199],"respectively.":[201],"Experimental":[202],"results":[203],"show":[204],"that":[205],"models":[208,211],"outperformed":[209],"previous":[213],"work.":[214]},"counts_by_year":[],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
