{"id":"https://openalex.org/W4372260133","doi":"https://doi.org/10.1109/icassp49357.2023.10095811","title":"Joint Training and Decoding for Multilingual End-to-End Simultaneous Speech Translation","display_name":"Joint Training and Decoding for Multilingual End-to-End Simultaneous Speech Translation","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372260133","doi":"https://doi.org/10.1109/icassp49357.2023.10095811"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095811","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095811","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043416325","display_name":"Wuwei Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I862669128","display_name":"Xiaomi (China)","ror":"https://ror.org/029f7bn57","country_code":"CN","type":"company","lineage":["https://openalex.org/I862669128"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wuwei Huang","raw_affiliation_strings":["Xiaomi AI Lab,Beijing,China","Xiaomi AI Lab, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Xiaomi AI Lab,Beijing,China","institution_ids":["https://openalex.org/I862669128"]},{"raw_affiliation_string":"Xiaomi AI Lab, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074106607","display_name":"Renren Jin","orcid":null},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Renren Jin","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing,Tianjin,China","College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100444393","display_name":"Wen Zhang","orcid":"https://orcid.org/0009-0003-1808-8741"},"institutions":[{"id":"https://openalex.org/I862669128","display_name":"Xiaomi (China)","ror":"https://ror.org/029f7bn57","country_code":"CN","type":"company","lineage":["https://openalex.org/I862669128"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Zhang","raw_affiliation_strings":["Xiaomi AI Lab,Beijing,China","Xiaomi AI Lab, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Xiaomi AI Lab,Beijing,China","institution_ids":["https://openalex.org/I862669128"]},{"raw_affiliation_string":"Xiaomi AI Lab, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054843960","display_name":"Jian Luan","orcid":"https://orcid.org/0000-0002-2383-226X"},"institutions":[{"id":"https://openalex.org/I862669128","display_name":"Xiaomi (China)","ror":"https://ror.org/029f7bn57","country_code":"CN","type":"company","lineage":["https://openalex.org/I862669128"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Luan","raw_affiliation_strings":["Xiaomi AI Lab,Beijing,China","Xiaomi AI Lab, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Xiaomi AI Lab,Beijing,China","institution_ids":["https://openalex.org/I862669128"]},{"raw_affiliation_string":"Xiaomi AI Lab, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102012403","display_name":"Bin Wang","orcid":"https://orcid.org/0000-0002-0006-2450"},"institutions":[{"id":"https://openalex.org/I862669128","display_name":"Xiaomi (China)","ror":"https://ror.org/029f7bn57","country_code":"CN","type":"company","lineage":["https://openalex.org/I862669128"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Wang","raw_affiliation_strings":["Xiaomi AI Lab,Beijing,China","Xiaomi AI Lab, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Xiaomi AI Lab,Beijing,China","institution_ids":["https://openalex.org/I862669128"]},{"raw_affiliation_string":"Xiaomi AI Lab, Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055232825","display_name":"Deyi Xiong","orcid":"https://orcid.org/0000-0002-2353-5038"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Deyi Xiong","raw_affiliation_strings":["Tianjin University,College of Intelligence and Computing,Tianjin,China","College of Intelligence and Computing, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"Tianjin University,College of Intelligence and Computing,Tianjin,China","institution_ids":["https://openalex.org/I162868743"]},{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5043416325"],"corresponding_institution_ids":["https://openalex.org/I862669128"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04026689,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8622767329216003},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.8176485896110535},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6420292854309082},{"id":"https://openalex.org/keywords/testbed","display_name":"Testbed","score":0.6191658973693848},{"id":"https://openalex.org/keywords/speech-translation","display_name":"Speech translation","score":0.6105890870094299},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.5944857597351074},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.5923004746437073},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.5758568048477173},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5741450786590576},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5166235566139221},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5046862363815308},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4524267911911011},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3759523332118988},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34029316902160645},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3223457932472229},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.13231393694877625},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.08893296122550964}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8622767329216003},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.8176485896110535},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6420292854309082},{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.6191658973693848},{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.6105890870094299},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.5944857597351074},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.5923004746437073},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.5758568048477173},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5741450786590576},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5166235566139221},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5046862363815308},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4524267911911011},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3759523332118988},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34029316902160645},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3223457932472229},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.13231393694877625},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.08893296122550964},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095811","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10095811","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2106440210","https://openalex.org/W2419292002","https://openalex.org/W2605131327","https://openalex.org/W2949328740","https://openalex.org/W2951456627","https://openalex.org/W2952167535","https://openalex.org/W2963247703","https://openalex.org/W2964161387","https://openalex.org/W2972448360","https://openalex.org/W3008125272","https://openalex.org/W3008549139","https://openalex.org/W3034571331","https://openalex.org/W3034625919","https://openalex.org/W3096871117","https://openalex.org/W3102811925","https://openalex.org/W3105669983","https://openalex.org/W3162000275","https://openalex.org/W3173171878","https://openalex.org/W4224137820","https://openalex.org/W4300558631","https://openalex.org/W6717306297","https://openalex.org/W6732953234","https://openalex.org/W6784851026","https://openalex.org/W6898505805"],"related_works":["https://openalex.org/W2883256816","https://openalex.org/W2171408034","https://openalex.org/W3003320923","https://openalex.org/W2106140982","https://openalex.org/W2990025607","https://openalex.org/W3045103338","https://openalex.org/W3007142233","https://openalex.org/W4399356803","https://openalex.org/W3177132412","https://openalex.org/W3198731777"],"abstract_inverted_index":{"Recent":[0],"studies":[1],"on":[2,70,101],"end-to-end":[3,12,15,23,80],"speech":[4,25],"translation(ST)":[5],"have":[6],"facilitated":[7],"the":[8,71,96,102],"exploration":[9],"of":[10,98],"multilingual":[11,30,79],"ST":[13,81],"and":[14,46,107],"simultaneous":[16,24],"ST.":[17],"In":[18],"this":[19,55],"paper,":[20],"we":[21,64],"investigate":[22],"translation":[26],"in":[27,37,54],"a":[28,42,47,86],"one-to-many":[29],"setting":[31],"which":[32],"is":[33],"closer":[34],"to":[35,89],"applications":[36],"real":[38],"scenarios.":[39],"We":[40],"explore":[41,59],"separate":[43],"decoder":[44,74],"architecture":[45,49],"unified":[48,73],"for":[50],"joint":[51],"synchronous":[52],"training":[53,68],"scenario.":[56],"To":[57],"further":[58],"knowledge":[60],"transfer":[61],"across":[62],"languages,":[63],"propose":[65],"an":[66],"asynchronous":[67],"strategy":[69],"proposed":[72],"architecture.":[75],"A":[76],"multi-way":[77],"aligned":[78],"dataset":[82],"was":[83],"curated":[84],"as":[85],"benchmark":[87],"testbed":[88],"evaluate":[90],"our":[91,99],"methods.":[92],"Experimental":[93],"results":[94],"demonstrate":[95],"effectiveness":[97],"models":[100],"collected":[103],"dataset.":[104],"Our":[105],"codes":[106],"data":[108],"are":[109],"available":[110],"at:":[111],"https://github.com/XiaoMi/TED-MMST.":[112]},"counts_by_year":[],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
