{"id":"https://openalex.org/W4417337016","doi":"https://doi.org/10.1109/access.2025.3644647","title":"SAM Translator: Self-Paced Learning And Mixture-of-Experts for Cross-Lingual Text-to-Speech Translation","display_name":"SAM Translator: Self-Paced Learning And Mixture-of-Experts for Cross-Lingual Text-to-Speech Translation","publication_year":2025,"publication_date":"2025-12-15","ids":{"openalex":"https://openalex.org/W4417337016","doi":"https://doi.org/10.1109/access.2025.3644647"},"language":null,"primary_location":{"id":"doi:10.1109/access.2025.3644647","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3644647","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2025.3644647","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Chung Tran","orcid":"https://orcid.org/0000-0003-1268-3630"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Chung Tran","raw_affiliation_strings":["Nara Institute of Science and Technology, Ikoma, Japan"],"affiliations":[{"raw_affiliation_string":"Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040108974","display_name":"Sakriani Sakti","orcid":"https://orcid.org/0000-0001-5509-8963"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sakriani Sakti","raw_affiliation_strings":["Nara Institute of Science and Technology, Ikoma, Japan"],"affiliations":[{"raw_affiliation_string":"Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I75917431"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2099144,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":null,"first_page":"2742","last_page":"2752"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.3878999948501587,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.3878999948501587,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.15060000121593475,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.08049999922513962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6284000277519226},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.6054999828338623},{"id":"https://openalex.org/keywords/welsh","display_name":"Welsh","score":0.5288000106811523},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5052000284194946},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.47099998593330383},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.44999998807907104},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.4178999960422516},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4115000069141388}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8587999939918518},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6284000277519226},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6122000217437744},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.6054999828338623},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5472999811172485},{"id":"https://openalex.org/C2780769345","wikidata":"https://www.wikidata.org/wiki/Q9309","display_name":"Welsh","level":2,"score":0.5288000106811523},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5052000284194946},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.47099998593330383},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.44999998807907104},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.4178999960422516},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4115000069141388},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.40299999713897705},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3257000148296356},{"id":"https://openalex.org/C622187","wikidata":"https://www.wikidata.org/wiki/Q3500773","display_name":"BLEU","level":3,"score":0.32019999623298645},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3125999867916107},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.30079999566078186},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C2985684807","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Text generation","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2635999917984009}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/access.2025.3644647","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3644647","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/access.2025.3644647","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3644647","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1435354637","display_name":null,"funder_award_id":"JP23K21681","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G2526566797","display_name":null,"funder_award_id":"JP23K21681","funder_id":"https://openalex.org/F4320320212","funder_display_name":"Japan Society for the Promotion of Science London"},{"id":"https://openalex.org/G6790398888","display_name":null,"funder_award_id":"JP21H05054","funder_id":"https://openalex.org/F4320320212","funder_display_name":"Japan Society for the Promotion of Science London"},{"id":"https://openalex.org/G7893352204","display_name":null,"funder_award_id":"JP21H05054","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G8355915826","display_name":null,"funder_award_id":"JPMJNX25C1","funder_id":"https://openalex.org/F4320320907","funder_display_name":"Japan Science and Technology Corporation"}],"funders":[{"id":"https://openalex.org/F4320320212","display_name":"Japan Society for the Promotion of Science London","ror":"https://ror.org/02m7axw05"},{"id":"https://openalex.org/F4320320907","display_name":"Japan Science and Technology Corporation","ror":"https://ror.org/00097mb19"},{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1600722501","https://openalex.org/W1935012542","https://openalex.org/W1978924416","https://openalex.org/W2150658333","https://openalex.org/W2561274697","https://openalex.org/W2964243274","https://openalex.org/W3001434439","https://openalex.org/W3174311593","https://openalex.org/W3174481817","https://openalex.org/W3196509775","https://openalex.org/W3209059054","https://openalex.org/W4386590854","https://openalex.org/W4390075359","https://openalex.org/W4409095723"],"related_works":[],"abstract_inverted_index":{"Most":[0],"existing":[1,30],"translation":[2,13],"studies":[3],"have":[4],"concentrated":[5],"on":[6,64,94,147,190],"text-to-text":[7],"or":[8],"speech-to-text":[9],"tasks,":[10],"whereas":[11],"text-to-speech":[12],"(T2ST)":[14],"remains":[15],"relatively":[16],"underexplored":[17],"despite":[18],"its":[19],"crucial":[20],"role":[21],"in":[22,40,142,214],"cultural":[23],"integration":[24],"and":[25,44,50,76,83,165,177,185],"education.":[26],"Among":[27],"the":[28,42,65,98,102,121,148,191,196,205,210],"few":[29],"T2ST":[31],"systems,":[32],"recent":[33],"end-to-end":[34],"approaches":[35],"still":[36],"face":[37],"significant":[38,173],"challenges":[39],"modeling":[41],"linguistic":[43,116],"acoustic":[45],"diversity":[46],"across":[47],"both":[48],"high-resource":[49,164],"low-resource":[51,122,168],"languages.":[52,169],"To":[53],"address":[54],"these":[55],"challenges,":[56],"we":[57,119],"propose":[58],"SAM-Translator,":[59],"a":[60,126],"novel":[61],"framework":[62],"based":[63,93],"mBART":[66],"model":[67,103],"that":[68,152],"incorporates":[69],"two":[70],"key":[71],"mechanisms:":[72],"Self-Paced":[73],"Learning":[74],"(SPL)":[75],"Mixture-of-Experts":[77],"(MoE).":[78],"SPL":[79],"enhances":[80],"training":[81,95],"efficiency":[82],"stability":[84],"by":[85],"progressively":[86],"introducing":[87],"samples":[88],"from":[89,136],"easy":[90],"to":[91,104,114,139],"difficult":[92],"loss.":[96],"Meanwhile,":[97],"MoE":[99],"mechanism":[100],"allows":[101],"dynamically":[105],"select":[106],"specialized":[107],"experts":[108],"for":[109,163,167,175],"prediction,":[110],"thereby":[111],"improving":[112],"adaptability":[113],"cross-lingual":[115],"variation.":[117],"Additionally,":[118],"tackle":[120],"language":[123],"problem":[124],"using":[125],"Data-Augmented":[127],"Self-Training":[128],"(DAST)":[129],"approach,":[130],"which":[131],"generates":[132],"synthetic":[133],"parallel":[134],"data":[135],"monolingual":[137],"corpora":[138],"boost":[140],"performance":[141],"data-scarce":[143],"scenarios.":[144],"Experimental":[145],"results":[146,208],"CVSS-C":[149],"benchmark":[150],"demonstrate":[151],"SAM-Translator":[153,213],"achieves":[154],"state-of-the-art":[155],"performance,":[156],"yielding":[157],"relative":[158,180],"BLEU":[159,181],"improvements":[160,182],"of":[161,183,212],"2\u20133%":[162],"7\u201310%":[166],"Remarkably,":[170],"DAST":[171],"brings":[172],"gains":[174],"Welsh":[176],"Mongolian,":[178],"with":[179,199],"21.55%":[184],"25.83%,":[186],"respectively.":[187],"Furthermore,":[188],"evaluations":[189],"large-scale":[192],"CoVoST-2":[193],"dataset":[194],"confirm":[195],"model\u2019s":[197],"robustness,":[198],"an":[200],"additional":[201],"4\u20136%":[202],"improvement":[203],"over":[204],"baseline.":[206],"These":[207],"underscore":[209],"effectiveness":[211],"advancing":[215],"multilingual":[216],"T2ST.":[217]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-12-15T00:00:00"}
