{"id":"https://openalex.org/W4408354333","doi":"https://doi.org/10.1109/icassp49660.2025.10890008","title":"Zero-resource Speech Translation and Recognition with LLMs","display_name":"Zero-resource Speech Translation and Recognition with LLMs","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408354333","doi":"https://doi.org/10.1109/icassp49660.2025.10890008"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10890008","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890008","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021276950","display_name":"Karel Mundnich","orcid":"https://orcid.org/0000-0003-4249-8356"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Karel Mundnich","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069475690","display_name":"Xing Niu","orcid":"https://orcid.org/0000-0001-8834-792X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xing Niu","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057049978","display_name":"Prashant Mathur","orcid":"https://orcid.org/0000-0002-9271-1373"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Prashant Mathur","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004616142","display_name":"Srikanth Ronanki","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Srikanth Ronanki","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019745645","display_name":"B. Houston","orcid":"https://orcid.org/0000-0001-5666-3629"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brady Houston","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093219929","display_name":"Veera Raghavendra Elluru","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Veera Raghavendra Elluru","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036502813","display_name":"Nilaksh Das","orcid":"https://orcid.org/0000-0002-5281-5549"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nilaksh Das","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055234106","display_name":"Zejiang Hou","orcid":"https://orcid.org/0000-0002-6836-8288"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zejiang Hou","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058343897","display_name":"Goeric Huybrechts","orcid":"https://orcid.org/0000-0003-0222-3008"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goeric Huybrechts","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109774129","display_name":"Anshu Bhatia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anshu Bhatia","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000143881","display_name":"Daniel Garcia-Romero","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daniel Garcia-Romero","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106859201","display_name":"Kyu J. Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kyu J. Han","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050325468","display_name":"Katrin Kirchhoff","orcid":"https://orcid.org/0000-0002-6645-6030"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Katrin Kirchhoff","raw_affiliation_strings":["AWS AI Labs"],"affiliations":[{"raw_affiliation_string":"AWS AI Labs","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5021276950"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.3637,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.93381569,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9018999934196472,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.6575740575790405},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6092551350593567},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6059383749961853},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5710652470588684},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.4711531400680542},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3688267469406128},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3271269202232361},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.09747767448425293},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.08052089810371399}],"concepts":[{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.6575740575790405},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6092551350593567},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6059383749961853},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5710652470588684},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.4711531400680542},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3688267469406128},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3271269202232361},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.09747767448425293},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.08052089810371399},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10890008","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890008","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W2963532001","https://openalex.org/W2979826702","https://openalex.org/W3015698636","https://openalex.org/W3095410713","https://openalex.org/W3097777922","https://openalex.org/W3119308075","https://openalex.org/W3169483174","https://openalex.org/W3196509775","https://openalex.org/W3214697273","https://openalex.org/W4295308567","https://openalex.org/W4297841687","https://openalex.org/W4319862635","https://openalex.org/W4385571124","https://openalex.org/W4385572710","https://openalex.org/W4390041933","https://openalex.org/W4391021623","https://openalex.org/W4392902792","https://openalex.org/W4392903956","https://openalex.org/W4402670519","https://openalex.org/W6757817989","https://openalex.org/W6769627184","https://openalex.org/W6771467084","https://openalex.org/W6780218876","https://openalex.org/W6796581206","https://openalex.org/W6810673746","https://openalex.org/W6838865847","https://openalex.org/W6853249747","https://openalex.org/W6853998256","https://openalex.org/W6859099255","https://openalex.org/W6866575372"],"related_works":["https://openalex.org/W3013650182","https://openalex.org/W2989283631","https://openalex.org/W4249605382","https://openalex.org/W4313491656","https://openalex.org/W3279617","https://openalex.org/W4402958497","https://openalex.org/W1991183963","https://openalex.org/W2250701745","https://openalex.org/W2053087750","https://openalex.org/W2146390824"],"abstract_inverted_index":{"Despite":[0],"recent":[1],"advancements":[2],"in":[3,36,84,105,122,130,159],"speech":[4,7,12,56],"processing,":[5],"zero-resource":[6],"translation":[8],"(ST)":[9],"and":[10,34,61,86,96],"automatic":[11],"recognition":[13],"(ASR)":[14],"remain":[15],"challenging":[16],"problems.":[17],"In":[18,109],"this":[19,50],"work,":[20],"we":[21,132],"propose":[22],"to":[23,31,71,88,91,116,137,156],"leverage":[24],"a":[25,53,58,62],"multilingual":[26,55,59],"Large":[27],"Language":[28],"Model":[29],"(LLM)":[30],"perform":[32,80],"ST":[33,85],"ASR":[35,87],"languages":[37],"for":[38,124],"which":[39],"the":[40,68,72,77,94,100,143,151,154,160],"model":[41,95,113],"has":[42,99],"never":[43],"seen":[44],"paired":[45],"audio-text":[46],"data.":[47],"We":[48,79,139],"achieve":[49,117,133],"by":[51,150],"using":[52],"pre-trained":[54],"encoder,":[57],"LLM,":[60],"lightweight":[63],"adaptation":[64],"module":[65],"that":[66,142],"maps":[67],"audio":[69],"representations":[70],"token":[73],"embedding":[74],"space":[75],"of":[76,135,145,153],"LLM.":[78],"several":[81],"experiments":[82],"both":[83],"understand":[89],"how":[90],"best":[92,112],"train":[93],"what":[97],"data":[98],"most":[101],"impact":[102],"on":[103],"performance":[104,144],"previously":[106,126],"unseen":[107,127],"languages.":[108],"ST,":[110],"our":[111,146],"is":[114,148],"capable":[115],"BLEU":[118],"scores":[119],"over":[120],"23":[121],"CoVoST2":[123],"two":[125],"languages,":[128],"while":[129],"ASR,":[131],"WERs":[134],"up":[136],"28.2%.":[138],"finally":[140],"show":[141],"system":[147],"bounded":[149],"ability":[152],"LLM":[155],"output":[157],"text":[158],"desired":[161],"language.":[162]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
