{"id":"https://openalex.org/W2972495969","doi":"https://doi.org/10.21437/interspeech.2019-1951","title":"Direct Speech-to-Speech Translation with a Sequence-to-Sequence Model","display_name":"Direct Speech-to-Speech Translation with a Sequence-to-Sequence Model","publication_year":2019,"publication_date":"2019-09-13","ids":{"openalex":"https://openalex.org/W2972495969","doi":"https://doi.org/10.21437/interspeech.2019-1951","mag":"2972495969"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2019-1951","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-1951","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102792436","display_name":"Jia Ye","orcid":"https://orcid.org/0000-0002-8000-4911"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ye Jia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103273436","display_name":"Ron J. Weiss","orcid":"https://orcid.org/0000-0003-2010-4053"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ron J. Weiss","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055592425","display_name":"Fadi Biadsy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fadi Biadsy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018646392","display_name":"Wolfgang Macherey","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wolfgang Macherey","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076150774","display_name":"Melvin Johnson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Melvin Johnson","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100715424","display_name":"Zhifeng Chen","orcid":"https://orcid.org/0000-0001-8631-2424"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhifeng Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5010253402","display_name":"Yonghui Wu","orcid":"https://orcid.org/0000-0002-6780-6135"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yonghui Wu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102792436"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":11.8519,"has_fulltext":false,"cited_by_count":168,"citation_normalized_percentile":{"value":0.98791772,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1123","last_page":"1127"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.7352696061134338},{"id":"https://openalex.org/keywords/speech-translation","display_name":"Speech translation","score":0.6960091590881348},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6928329467773438},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6495486497879028},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5891033411026001},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5366937518119812},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5229471325874329},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4194498658180237},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.397433340549469},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.06651648879051208}],"concepts":[{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.7352696061134338},{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.6960091590881348},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6928329467773438},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6495486497879028},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5891033411026001},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5366937518119812},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5229471325874329},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4194498658180237},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.397433340549469},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.06651648879051208},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2019-1951","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2019-1951","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2019","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4699999988079071,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320317153","display_name":"DeepMind","ror":"https://ror.org/00971b260"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1537859740","https://openalex.org/W1538023239","https://openalex.org/W1962947832","https://openalex.org/W2011783148","https://openalex.org/W2097203679","https://openalex.org/W2113106066","https://openalex.org/W2120847449","https://openalex.org/W2133300417","https://openalex.org/W2133564696","https://openalex.org/W2136545725","https://openalex.org/W2139647714","https://openalex.org/W2152834109","https://openalex.org/W2525778437","https://openalex.org/W2605131327","https://openalex.org/W2747920239","https://openalex.org/W2788357188","https://openalex.org/W2794490148","https://openalex.org/W2795581297","https://openalex.org/W2808706139","https://openalex.org/W2892620417","https://openalex.org/W2896538040","https://openalex.org/W2912492482","https://openalex.org/W2928941594","https://openalex.org/W2941115821","https://openalex.org/W2949328740","https://openalex.org/W2962824709","https://openalex.org/W2963011080","https://openalex.org/W2963609956","https://openalex.org/W2963779652","https://openalex.org/W2964138190","https://openalex.org/W2964243274","https://openalex.org/W2972970915","https://openalex.org/W3012492057","https://openalex.org/W4289383906","https://openalex.org/W4293569541","https://openalex.org/W4293714597","https://openalex.org/W4294619240","https://openalex.org/W4298174729","https://openalex.org/W4298580827","https://openalex.org/W4300558631","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2610387714","https://openalex.org/W1512718085","https://openalex.org/W1484029852","https://openalex.org/W4379525811","https://openalex.org/W2167662847","https://openalex.org/W2186089609","https://openalex.org/W2178499706","https://openalex.org/W2405158679","https://openalex.org/W4385570101","https://openalex.org/W3216100938"],"abstract_inverted_index":{"We":[0],"present":[1],"an":[2,23],"attention-based":[3],"sequence-to-sequence":[4],"neural":[5],"network":[6,27],"which":[7],"can":[8],"directly":[9],"translate":[10],"speech":[11,16,34,59,72],"from":[12],"one":[13],"language":[14],"into":[15,36],"in":[17,39],"another":[18,40],"language,":[19,41],"without":[20],"relying":[21],"on":[22,69,103],"intermediate":[24],"text":[25],"representation.The":[26],"is":[28],"trained":[29],"end-to-end,":[30],"learning":[31],"to":[32,43,56],"map":[33],"spectrograms":[35,38],"target":[37],"corresponding":[42],"the":[44,54,61,64,78,98,101],"translated":[45,58],"content":[46],"(in":[47],"a":[48,83,87,93],"different":[49],"canonical":[50],"voice).We":[51],"further":[52],"demonstrate":[53],"ability":[55],"synthesize":[57],"using":[60],"voice":[62],"of":[63,86,100],"source":[65],"speaker.We":[66],"conduct":[67],"experiments":[68],"two":[70],"Spanish-to-English":[71],"translation":[73,90],"datasets,":[74],"and":[75,92],"find":[76],"that":[77],"proposed":[79],"model":[80,91],"slightly":[81],"underperforms":[82],"baseline":[84],"cascade":[85],"direct":[88],"speech-to-text":[89],"text-to-speech":[94],"synthesis":[95],"model,":[96],"demonstrating":[97],"feasibility":[99],"approach":[102],"this":[104],"very":[105],"challenging":[106],"task.":[107]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":20},{"year":2024,"cited_by_count":19},{"year":2023,"cited_by_count":38},{"year":2022,"cited_by_count":16},{"year":2021,"cited_by_count":31},{"year":2020,"cited_by_count":23},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":2}],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
