{"id":"https://openalex.org/W4412888920","doi":"https://doi.org/10.18653/v1/2025.findings-acl.75","title":"Leveraging Unit Language Guidance to Advance Speech Modeling in Textless Speech-to-Speech Translation","display_name":"Leveraging Unit Language Guidance to Advance Speech Modeling in Textless Speech-to-Speech Translation","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412888920","doi":"https://doi.org/10.18653/v1/2025.findings-acl.75"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-acl.75","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.75","pdf_url":"https://aclanthology.org/2025.findings-acl.75.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-acl.75.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100349339","display_name":"Yuhao Zhang","orcid":"https://orcid.org/0000-0001-7325-6507"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuhao Zhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078038985","display_name":"Xiangnan Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiangnan Ma","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005320321","display_name":"Kaiqi Kou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kaiqi Kou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030745240","display_name":"Peizhuo Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peizhuo Liu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065538178","display_name":"Weiqiao Shan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weiqiao Shan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057282504","display_name":"Benyou Wang","orcid":"https://orcid.org/0000-0002-1501-9914"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benyou Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100600701","display_name":"Tong Xiao","orcid":"https://orcid.org/0000-0002-5842-6501"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tong Xiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102000307","display_name":"Yuxin Huang","orcid":"https://orcid.org/0000-0003-1277-6212"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuxin Huang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108312981","display_name":"Zhengtao Yu","orcid":"https://orcid.org/0000-0002-9525-9060"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhengtao Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100370145","display_name":"Jingbo Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"JingBo Zhu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08636456,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1448","last_page":"1460"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9703999757766724,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9703999757766724,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9639000296592712,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9240000247955322,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-translation","display_name":"Speech translation","score":0.8251190185546875},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7325247526168823},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.5948792695999146},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5788597464561462},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5627329349517822},{"id":"https://openalex.org/keywords/unit","display_name":"Unit (ring theory)","score":0.4659389555454254},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.45031028985977173},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.43933552503585815},{"id":"https://openalex.org/keywords/speech-production","display_name":"Speech production","score":0.4101521372795105},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4017584025859833},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.37075722217559814},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.10494714975357056}],"concepts":[{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.8251190185546875},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7325247526168823},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.5948792695999146},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5788597464561462},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5627329349517822},{"id":"https://openalex.org/C122637931","wikidata":"https://www.wikidata.org/wiki/Q118084","display_name":"Unit (ring theory)","level":2,"score":0.4659389555454254},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.45031028985977173},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43933552503585815},{"id":"https://openalex.org/C43617652","wikidata":"https://www.wikidata.org/wiki/Q7575399","display_name":"Speech production","level":2,"score":0.4101521372795105},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4017584025859833},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.37075722217559814},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.10494714975357056},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-acl.75","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.75","pdf_url":"https://aclanthology.org/2025.findings-acl.75.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-acl.75","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.75","pdf_url":"https://aclanthology.org/2025.findings-acl.75.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6000000238418579,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1187028490","display_name":null,"funder_award_id":"B16009","funder_id":"https://openalex.org/F4320323086","funder_display_name":"Natural Science Foundation of Liaoning Province"},{"id":"https://openalex.org/G5791456923","display_name":null,"funder_award_id":"B16009","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G6641886038","display_name":null,"funder_award_id":"B16009","funder_id":"https://openalex.org/F4320335785","funder_display_name":"Project 211"},{"id":"https://openalex.org/G7096855582","display_name":null,"funder_award_id":"U24A20334","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G969350000","display_name":null,"funder_award_id":"B16009","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323086","display_name":"Natural Science Foundation of Liaoning Province","ror":null},{"id":"https://openalex.org/F4320335785","display_name":"Project 211","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412888920.pdf","grobid_xml":"https://content.openalex.org/works/W4412888920.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4385571610","https://openalex.org/W2334129531","https://openalex.org/W39235475","https://openalex.org/W2969905756","https://openalex.org/W1540615732","https://openalex.org/W2515097069","https://openalex.org/W231741463","https://openalex.org/W4200068392","https://openalex.org/W4362498905","https://openalex.org/W2338806053"],"abstract_inverted_index":{"The":[0],"success":[1],"of":[2,34,107],"building":[3],"textless":[4],"speech-tospeech":[5],"translation":[6],"(S2ST)":[7],"models":[8,124],"has":[9],"attracted":[10],"much":[11],"attention.However,":[12],"S2ST":[13],"still":[14],"faces":[15],"two":[16,50],"main":[17],"challenges:":[18],"1)":[19],"extracting":[20],"linguistic":[21],"features":[22],"for":[23],"various":[24],"speech":[25,78],"signals,":[26],"called":[27,40],"crossmodal":[28],"(CM),":[29],"and":[30,89,119],"2)":[31],"learning":[32,69],"alignment":[33],"difference":[35],"languages":[36,92,106],"in":[37,75],"long":[38],"sequences,":[39],"cross-lingual":[41],"(CL).We":[42],"propose":[43,94],"the":[44,49,72,77,108],"unit":[45,53,73,91],"language":[46,54,65,74],"to":[47,70,98,123],"overcome":[48],"modeling":[51,79,97],"challenges.The":[52],"can":[55],"be":[56],"considered":[57],"a":[58,84,116],"text-like":[59],"representation":[60],"format,":[61],"constructed":[62],"using":[63],"n-gram":[64],"modeling.We":[66],"implement":[67],"multi-task":[68],"utilize":[71],"guiding":[76],"process.Our":[80],"initial":[81],"results":[82],"reveal":[83],"conflict":[85],"when":[86],"applying":[87],"source":[88],"target":[90],"simultaneously.We":[93],"task":[95],"prompt":[96],"mitigate":[99],"this":[100],"conflict.We":[101],"conduct":[102],"experiments":[103],"on":[104],"four":[105],"Voxpupil":[109],"dataset.Our":[110],"method":[111],"demonstrates":[112],"significant":[113],"improvements":[114],"over":[115],"strong":[117],"baseline":[118],"achieves":[120],"performance":[121],"comparable":[122],"trained":[125],"with":[126],"text.":[127]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
