{"id":"https://openalex.org/W4392902935","doi":"https://doi.org/10.1109/icassp48485.2024.10447450","title":"Memory-Augmented speech-to-text Translation with Multi-Scale Context Translation Strategy","display_name":"Memory-Augmented speech-to-text Translation with Multi-Scale Context Translation Strategy","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392902935","doi":"https://doi.org/10.1109/icassp48485.2024.10447450"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447450","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447450","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103263121","display_name":"Yuxuan Yuan","orcid":"https://orcid.org/0009-0003-2850-4366"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuxuan Yuan","raw_affiliation_strings":["Xiamen University,School of Informatics,Department of Artificial Intelligence,China","Department of Artificial Intelligence, School of Informatics, Xiamen University, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,Department of Artificial Intelligence,China","institution_ids":["https://openalex.org/I191208505"]},{"raw_affiliation_string":"Department of Artificial Intelligence, School of Informatics, Xiamen University, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100842413","display_name":"Yue Zhou","orcid":"https://orcid.org/0009-0007-4941-2099"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yue Zhou","raw_affiliation_strings":["Xiamen University,School of Informatics,Department of Artificial Intelligence,China","Department of Artificial Intelligence, School of Informatics, Xiamen University, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,Department of Artificial Intelligence,China","institution_ids":["https://openalex.org/I191208505"]},{"raw_affiliation_string":"Department of Artificial Intelligence, School of Informatics, Xiamen University, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021246376","display_name":"Xiaodong Shi","orcid":"https://orcid.org/0000-0002-8163-7139"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaodong Shi","raw_affiliation_strings":["Xiamen University,School of Informatics,Department of Artificial Intelligence,China","Department of Artificial Intelligence, School of Informatics, Xiamen University, China","Key Laboratory of Digital Protection and Intelligent Processing of Intangible Cultural Heritage of Fujian and Taiwan, Ministry of Culture and Tourism, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,Department of Artificial Intelligence,China","institution_ids":["https://openalex.org/I191208505"]},{"raw_affiliation_string":"Department of Artificial Intelligence, School of Informatics, Xiamen University, China","institution_ids":["https://openalex.org/I191208505"]},{"raw_affiliation_string":"Key Laboratory of Digital Protection and Intelligent Processing of Intangible Cultural Heritage of Fujian and Taiwan, Ministry of Culture and Tourism, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5103263121"],"corresponding_institution_ids":["https://openalex.org/I191208505"],"apc_list":null,"apc_paid":null,"fwci":0.7274,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.73067678,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"12727","last_page":"12731"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8542846441268921},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.7872229814529419},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7567316889762878},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.7289241552352905},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.6173220276832581},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6150440573692322},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5903011560440063},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5665291547775269},{"id":"https://openalex.org/keywords/example-based-machine-translation","display_name":"Example-based machine translation","score":0.5078433156013489},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4876750707626343}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8542846441268921},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.7872229814529419},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7567316889762878},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.7289241552352905},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.6173220276832581},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6150440573692322},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5903011560440063},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5665291547775269},{"id":"https://openalex.org/C24687705","wikidata":"https://www.wikidata.org/wiki/Q3753284","display_name":"Example-based machine translation","level":3,"score":0.5078433156013489},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4876750707626343},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447450","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447450","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2127141656","https://openalex.org/W2901389167","https://openalex.org/W2933138175","https://openalex.org/W2952446148","https://openalex.org/W2963250244","https://openalex.org/W2963532001","https://openalex.org/W2964110616","https://openalex.org/W3035252911","https://openalex.org/W3097301532","https://openalex.org/W3103878009","https://openalex.org/W3113676066","https://openalex.org/W3153583341","https://openalex.org/W3173657420","https://openalex.org/W3173691187","https://openalex.org/W3211978535","https://openalex.org/W4225318329","https://openalex.org/W4229016505","https://openalex.org/W4375869031","https://openalex.org/W6732953234","https://openalex.org/W6755207826","https://openalex.org/W6757094361","https://openalex.org/W6811049435","https://openalex.org/W6839510803","https://openalex.org/W6840426214"],"related_works":["https://openalex.org/W193726211","https://openalex.org/W2566847733","https://openalex.org/W2122287718","https://openalex.org/W2010336863","https://openalex.org/W2740094425","https://openalex.org/W3204448004","https://openalex.org/W2587602790","https://openalex.org/W3011059803","https://openalex.org/W4378619223","https://openalex.org/W2809655258"],"abstract_inverted_index":{"End-to-end":[0],"speech-to-text":[1,69],"translation":[2],"(ST)":[3],"has":[4,43],"demonstrated":[5],"promising":[6],"results":[7],"on":[8,55,109],"sentence-level":[9,125],"translation.":[10,24,79],"In":[11,63],"real-world":[12],"scenarios,":[13],"audio":[14],"is":[15],"typically":[16],"long":[17],"and":[18],"requires":[19],"cross-sentence":[20],"contextual":[21],"connections":[22],"for":[23,49],"Sentence-level":[25],"ST":[26,60],"models":[27],"are":[28],"facing":[29],"challenges":[30],"since":[31],"they":[32],"lack":[33],"the":[34,82,85,123],"ability":[35,83],"to":[36,46,76,88],"understand":[37],"inter-sentential":[38],"context.":[39,107],"As":[40],"context":[41,57],"information":[42,58,90],"been":[44],"proved":[45],"be":[47],"effective":[48],"document-level":[50],"machine":[51],"translation,":[52,70],"however,":[53],"research":[54],"incorporating":[56],"into":[59],"remains":[61],"under-explored.":[62],"this":[64],"paper,":[65],"we":[66,93],"propose":[67],"memory-augmented":[68],"which":[71],"leverages":[72],"a":[73],"memory":[74,86],"module":[75,87],"perform":[77],"context-aware":[78,120],"To":[80],"enhance":[81],"of":[84,106],"extract":[89],"from":[91],"context,":[92],"develop":[94],"Multi-Scale":[95],"Context":[96],"Translation":[97],"Strategy":[98],"(MSCTS)":[99],"that":[100,113],"translates":[101],"segments":[102],"with":[103],"different":[104],"size":[105],"Experiments":[108],"MuST-C":[110],"benchmark":[111],"show":[112],"our":[114],"proposed":[115],"method":[116],"can":[117],"significantly":[118],"improve":[119],"ST,":[121],"outperforming":[122],"strong":[124],"baseline":[126],"by":[127],"+0.8":[128],"BLEU":[129],"in":[130],"average.":[131]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
