{"id":"https://openalex.org/W3017535695","doi":"https://doi.org/10.1109/taslp.2020.2986886","title":"End-to-End Speech Translation With Transcoding by Multi-Task Learning for Distant Language Pairs","display_name":"End-to-End Speech Translation With Transcoding by Multi-Task Learning for Distant Language Pairs","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3017535695","doi":"https://doi.org/10.1109/taslp.2020.2986886","mag":"3017535695"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2020.2986886","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2020.2986886","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://naist.repo.nii.ac.jp/record/4205/files/paper_202005_IEEE_TASLP_takatomo-k.paper_20200601_142022_Rk.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042592085","display_name":"Takatomo Kano","orcid":"https://orcid.org/0000-0001-9693-3785"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Takatomo Kano","raw_affiliation_strings":["Nara Institute of Science and Technology, Ikoma, Japan"],"affiliations":[{"raw_affiliation_string":"Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040108974","display_name":"Sakriani Sakti","orcid":"https://orcid.org/0000-0001-5509-8963"},"institutions":[{"id":"https://openalex.org/I4210126580","display_name":"RIKEN Center for Advanced Intelligence Project","ror":"https://ror.org/03ckxwf91","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210126580"]},{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sakriani Sakti","raw_affiliation_strings":["Graduate School of Science and Technology, Nara Institute of Science and Technology, Ikoma, Japan","RIKEN Center for Advanced Intelligence Project"],"affiliations":[{"raw_affiliation_string":"Graduate School of Science and Technology, Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]},{"raw_affiliation_string":"RIKEN Center for Advanced Intelligence Project","institution_ids":["https://openalex.org/I4210126580"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020994673","display_name":"Satoshi Nakamura","orcid":"https://orcid.org/0000-0001-6956-3803"},"institutions":[{"id":"https://openalex.org/I4210126580","display_name":"RIKEN Center for Advanced Intelligence Project","ror":"https://ror.org/03ckxwf91","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210126580"]},{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Nakamura","raw_affiliation_strings":["Data Science Center and Graduate School of Science and Technology, Nara Institute of Science and Technology, Ikoma, Japan","RIKEN Center for Advanced Intelligence Project"],"affiliations":[{"raw_affiliation_string":"Data Science Center and Graduate School of Science and Technology, Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]},{"raw_affiliation_string":"RIKEN Center for Advanced Intelligence Project","institution_ids":["https://openalex.org/I4210126580"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5042592085"],"corresponding_institution_ids":["https://openalex.org/I75917431"],"apc_list":null,"apc_paid":null,"fwci":4.2137,"has_fulltext":true,"cited_by_count":42,"citation_normalized_percentile":{"value":0.95102493,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"28","issue":null,"first_page":"1342","last_page":"1355"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8414000272750854},{"id":"https://openalex.org/keywords/speech-translation","display_name":"Speech translation","score":0.675288200378418},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5809743404388428},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5697956085205078},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5685070753097534},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5472738742828369},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4558694064617157},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.4368242621421814},{"id":"https://openalex.org/keywords/word-order","display_name":"Word order","score":0.4271831810474396}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8414000272750854},{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.675288200378418},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5809743404388428},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5697956085205078},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5685070753097534},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5472738742828369},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4558694064617157},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.4368242621421814},{"id":"https://openalex.org/C70777604","wikidata":"https://www.wikidata.org/wiki/Q257885","display_name":"Word order","level":2,"score":0.4271831810474396}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/taslp.2020.2986886","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2020.2986886","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:irdb.nii.ac.jp:01146:0005782656","is_oa":true,"landing_page_url":"https://naist.repo.nii.ac.jp/records/4205","pdf_url":"https://naist.repo.nii.ac.jp/record/4205/files/paper_202005_IEEE_TASLP_takatomo-k.paper_20200601_142022_Rk.pdf","source":{"id":"https://openalex.org/S7407056385","display_name":"Institutional Repositories DataBase (IRDB)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I184597095","host_organization_name":"National Institute of Informatics","host_organization_lineage":["https://openalex.org/I184597095"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal article"},{"id":"pmh:oai:library.naist.jp:10061/13989","is_oa":true,"landing_page_url":"http://hdl.handle.net/10061/13989","pdf_url":null,"source":{"id":"https://openalex.org/S4377196843","display_name":"NAIST Digital Library (Nara Institute of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I75917431","host_organization_name":"Nara Institute of Science and Technology","host_organization_lineage":["https://openalex.org/I75917431"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"}],"best_oa_location":{"id":"pmh:oai:irdb.nii.ac.jp:01146:0005782656","is_oa":true,"landing_page_url":"https://naist.repo.nii.ac.jp/records/4205","pdf_url":"https://naist.repo.nii.ac.jp/record/4205/files/paper_202005_IEEE_TASLP_takatomo-k.paper_20200601_142022_Rk.pdf","source":{"id":"https://openalex.org/S7407056385","display_name":"Institutional Repositories DataBase (IRDB)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I184597095","host_organization_name":"National Institute of Informatics","host_organization_lineage":["https://openalex.org/I184597095"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.800000011920929,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320324891","display_name":"Iran Telecommunication Research Center","ror":"https://ror.org/01a3g2z22"},{"id":"https://openalex.org/F4320335839","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3017535695.pdf","grobid_xml":"https://content.openalex.org/works/W3017535695.grobid-xml"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W206967138","https://openalex.org/W854541894","https://openalex.org/W1522301498","https://openalex.org/W1902237438","https://openalex.org/W1924770834","https://openalex.org/W1979356863","https://openalex.org/W2064675550","https://openalex.org/W2101105183","https://openalex.org/W2124807415","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2136545725","https://openalex.org/W2152834109","https://openalex.org/W2153653739","https://openalex.org/W2161742089","https://openalex.org/W2194775991","https://openalex.org/W2251912507","https://openalex.org/W2295730220","https://openalex.org/W2296073425","https://openalex.org/W2345837149","https://openalex.org/W2466918907","https://openalex.org/W2507561499","https://openalex.org/W2593011301","https://openalex.org/W2605131327","https://openalex.org/W2892009249","https://openalex.org/W2949328740","https://openalex.org/W2962784628","https://openalex.org/W2962965405","https://openalex.org/W2963403868","https://openalex.org/W2963609956","https://openalex.org/W2964121744","https://openalex.org/W2964308564","https://openalex.org/W4241645538","https://openalex.org/W4300558631","https://openalex.org/W4385245566","https://openalex.org/W6608432165","https://openalex.org/W6623517193","https://openalex.org/W6631190155","https://openalex.org/W6640212811","https://openalex.org/W6679434410","https://openalex.org/W6679436768","https://openalex.org/W6691654534","https://openalex.org/W6697245993","https://openalex.org/W6732953234","https://openalex.org/W6736180185","https://openalex.org/W6736996214","https://openalex.org/W6739901393","https://openalex.org/W6780226713"],"related_works":["https://openalex.org/W3011059803","https://openalex.org/W4301342010","https://openalex.org/W158465921","https://openalex.org/W3151736118","https://openalex.org/W4362495644","https://openalex.org/W2338806053","https://openalex.org/W4385571610","https://openalex.org/W2794347674","https://openalex.org/W2293738010","https://openalex.org/W4360995948"],"abstract_inverted_index":{"Directly":[0],"translating":[1],"spoken":[2],"utterances":[3,134,245],"from":[4,175,246],"a":[5,9,17,89,276],"source":[6],"language":[7,11,103,118,171,193],"to":[8,160,241],"target":[10],"is":[12,149],"challenging":[13],"because":[14],"it":[15],"requires":[16,124],"fundamental":[18],"transformation":[19],"in":[20,63],"both":[21],"linguistic":[22],"and":[23,39,53,86,108,152,188,212,268,281],"para/non-linguistic":[24],"features.":[25],"Traditional":[26],"speech-to-speech":[27],"translation":[28,37,82,123,142,166,209,226,272],"approaches":[29],"concatenate":[30],"automatic":[31],"speech":[32,122,133,141,208,225,244,271],"recognition":[33],"(ASR),":[34],"text-to-text":[35],"machine":[36],"(MT),":[38],"text-to-speech":[40],"synthesizer":[41],"(TTS)":[42],"by":[43,83,228],"text":[44,250],"information.":[45],"The":[46],"current":[47],"state-of-the-art":[48],"models":[49,97,267],"for":[50,138,223,238],"ASR,":[51],"MT,":[52],"TTS":[54,237],"have":[55,77],"mainly":[56],"been":[57,100],"built":[58],"using":[59],"deep":[60],"neural":[61,68],"networks,":[62],"particular,":[64],"an":[65,71,162],"attention-based":[66,198],"encoder-decoder":[67,199],"network":[69,222],"with":[70,131,210,264],"attention":[72],"mechanism.":[73],"Recently,":[74],"several":[75],"works":[76],"constructed":[78],"end-to-end":[79,140,163,207,224],"direct":[80,164,270],"speech-to-text":[81,165],"combining":[84],"ASR":[85],"MT":[87],"into":[88],"single":[90,277],"model.":[91],"However,":[92],"the":[93,157,180,197,221,230,247,257,269],"usefulness":[94],"of":[95,105],"these":[96],"has":[98],"only":[99],"investigated":[101],"on":[102,168,182,201],"pairs":[104,119,172],"similar":[106],"syntax":[107],"word":[109,126,186,191],"order":[110],"(e.g.,":[111,120],"English-French":[112],"or":[113,232],"English-Spanish).":[114],"For":[115],"syntactically":[116,169],"distant":[117,125,170],"English-Japanese),":[121],"reordering.":[127,177],"Furthermore,":[128],"parallel":[129,249],"texts":[130],"corresponding":[132,243],"that":[135,173,218,256,274],"are":[136,143],"suitable":[137],"training":[139],"generally":[144],"unavailable.":[145],"Collecting":[146],"such":[147],"corpora":[148],"usually":[150],"time-consuming":[151],"expensive.":[153],"This":[154],"article":[155],"proposes":[156],"first":[158],"attempt":[159],"build":[161],"system":[167],"suffer":[174],"long-distance":[176],"We":[178,235],"train":[179,220],"model":[181,200,278],"English":[183],"(subject-verb-object":[184],"(SVO)":[185],"order)":[187,192],"Japanese":[189],"(SOV":[190],"pairs.":[194],"To":[195],"guide":[196],"this":[202],"difficult":[203],"problem,":[204],"we":[205],"construct":[206],"transcoding":[211,280],"utilize":[213],"curriculum":[214],"learning":[215],"(CL)":[216],"strategies":[217],"gradually":[219],"tasks":[227],"adapting":[229],"decoder":[231],"encoder":[233],"parts.":[234],"use":[236],"data":[239],"augmentation":[240],"generate":[242],"existing":[248],"data.":[251],"Our":[252],"experiment":[253],"results":[254],"show":[255],"proposed":[258],"approach":[259,273],"provides":[260],"significant":[261],"improvements":[262],"compared":[263],"conventional":[265],"cascade":[266],"uses":[275],"without":[279],"CL":[282],"strategies.":[283]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":4}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
