{"id":"https://openalex.org/W7131834670","doi":"https://doi.org/10.48550/arxiv.2602.22522","title":"Efficient Dialect-Aware Modeling and Conditioning for Low-Resource Taiwanese Hakka Speech Processing","display_name":"Efficient Dialect-Aware Modeling and Conditioning for Low-Resource Taiwanese Hakka Speech Processing","publication_year":2026,"publication_date":"2026-02-26","ids":{"openalex":"https://openalex.org/W7131834670","doi":"https://doi.org/10.48550/arxiv.2602.22522"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.22522","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119719131","display_name":"An-Ci Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Peng, An-Ci","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101403221","display_name":"Kun Huang","orcid":"https://orcid.org/0000-0001-6507-4236"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Kuan-Tang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127262691","display_name":"Tien-Hong Lo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lo, Tien-Hong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127313615","display_name":"Hung-Shin Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Hung-Shin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033417091","display_name":"H. L. Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hsin-Min","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5127291421","display_name":"Berlin Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Berlin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5119719131"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.3912999927997589,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.3912999927997589,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.3643999993801117,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.059700001031160355,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/conflation","display_name":"Conflation","score":0.6868000030517578},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.5210000276565552},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.47679999470710754},{"id":"https://openalex.org/keywords/pinyin","display_name":"Pinyin","score":0.45100000500679016},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.38440001010894775},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.3824999928474426},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.3772999942302704},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.375900000333786}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7286999821662903},{"id":"https://openalex.org/C130440534","wikidata":"https://www.wikidata.org/wiki/Q14946528","display_name":"Conflation","level":2,"score":0.6868000030517578},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5439000129699707},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.5210000276565552},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5178999900817871},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4934000074863434},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.47679999470710754},{"id":"https://openalex.org/C2781095461","wikidata":"https://www.wikidata.org/wiki/Q42222","display_name":"Pinyin","level":3,"score":0.45100000500679016},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.38440001010894775},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3824999928474426},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.3772999942302704},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.375900000333786},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.33009999990463257},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.3160000145435333},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.30550000071525574},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.2809000015258789},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2678000032901764},{"id":"https://openalex.org/C104122410","wikidata":"https://www.wikidata.org/wiki/Q1416406","display_name":"Network model","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.25519999861717224}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.22522","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.22522","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.22522","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.22522","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8047822713851929,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Taiwanese":[0],"Hakka":[1,179],"is":[2,78,170],"a":[3,63,124,133],"low-resource,":[4],"endangered":[5],"language":[6],"that":[7,120,149],"poses":[8],"significant":[9],"challenges":[10],"for":[11],"automatic":[12],"speech":[13],"recognition":[14],"(ASR),":[15],"including":[16],"high":[17],"dialectal":[18,88,180],"variability":[19],"and":[20,29,54,101,116,154,162,184],"the":[21,68,79,95,105,128,138,145,171,176,185],"presence":[22],"of":[23,81,178,190],"two":[24],"distinct":[25],"writing":[26],"systems":[27],"(Hanzi":[28,115],"Pinyin).":[30,117],"Traditional":[31],"ASR":[32,114,140,183],"models":[33],"often":[34],"encounter":[35],"difficulties":[36],"in":[37,67],"this":[38,169],"context,":[39],"as":[40,132],"they":[41],"tend":[42],"to":[43,75,86,98,111,136],"conflate":[44],"essential":[45],"linguistic":[46,91],"content":[47],"with":[48],"dialect-specific":[49],"variations":[50,181],"across":[51],"both":[52],"phonological":[53],"lexical":[55],"dimensions.":[56],"To":[57,166],"address":[58],"these":[59,121,193],"challenges,":[60],"we":[61],"propose":[62],"unified":[64],"framework":[65,106],"grounded":[66],"Recurrent":[69],"Neural":[70],"Network":[71],"Transducers":[72],"(RNN-T).":[73],"Central":[74],"our":[76,150,167],"approach":[77],"introduction":[80],"dialect-aware":[82],"modeling":[83],"strategies":[84],"designed":[85],"disentangle":[87],"\"style\"":[89],"from":[90],"\"content\",":[92],"which":[93],"enhances":[94],"model's":[96],"capacity":[97],"learn":[99],"robust":[100],"generalized":[102],"representations.":[103],"Additionally,":[104],"employs":[107],"parameter-efficient":[108],"prediction":[109],"networks":[110],"concurrently":[112],"model":[113,151,188],"We":[118],"demonstrate":[119],"tasks":[122],"create":[123],"powerful":[125],"synergy,":[126],"wherein":[127],"cross-script":[129],"objective":[130],"serves":[131],"mutual":[134],"regularizer":[135],"improve":[137],"primary":[139],"tasks.":[141,194],"Experiments":[142],"conducted":[143],"on":[144,160,182],"HAT":[146],"corpus":[147],"reveal":[148],"achieves":[152],"57.00%":[153],"40.41%":[155],"relative":[156],"error":[157],"rate":[158],"reduction":[159],"Hanzi":[161],"Pinyin":[163],"ASR,":[164],"respectively.":[165],"knowledge,":[168],"first":[172,186],"systematic":[173],"investigation":[174],"into":[175],"impact":[177],"single":[187],"capable":[189],"jointly":[191],"addressing":[192]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-28T00:00:00"}
