{"id":"https://openalex.org/W4392902920","doi":"https://doi.org/10.1109/icassp48485.2024.10447494","title":"Soft Alignment of Modality Space for End-to-End Speech Translation","display_name":"Soft Alignment of Modality Space for End-to-End Speech Translation","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392902920","doi":"https://doi.org/10.1109/icassp48485.2024.10447494"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447494","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447494","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100349352","display_name":"Yuhao Zhang","orcid":"https://orcid.org/0000-0002-9856-436X"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuhao Zhang","raw_affiliation_strings":["Northeastern University,School of Computer Science and Engineering,Shenyang,China","School of Computer Science and Engineering, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Northeastern University,School of Computer Science and Engineering,Shenyang,China","institution_ids":["https://openalex.org/I9224756"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005320321","display_name":"Kaiqi Kou","orcid":null},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaiqi Kou","raw_affiliation_strings":["Northeastern University,School of Computer Science and Engineering,Shenyang,China","School of Computer Science and Engineering, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Northeastern University,School of Computer Science and Engineering,Shenyang,China","institution_ids":["https://openalex.org/I9224756"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107028150","display_name":"Bei Li","orcid":"https://orcid.org/0000-0001-7617-9041"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bei Li","raw_affiliation_strings":["Northeastern University,School of Computer Science and Engineering,Shenyang,China","School of Computer Science and Engineering, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Northeastern University,School of Computer Science and Engineering,Shenyang,China","institution_ids":["https://openalex.org/I9224756"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100385729","display_name":"Chen Xu","orcid":"https://orcid.org/0000-0002-7238-7254"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chen Xu","raw_affiliation_strings":["Harbin Engineering University"],"affiliations":[{"raw_affiliation_string":"Harbin Engineering University","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101997180","display_name":"Chunliang Zhang","orcid":"https://orcid.org/0000-0002-4382-2413"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunliang Zhang","raw_affiliation_strings":["Northeastern University,School of Computer Science and Engineering,Shenyang,China","School of Computer Science and Engineering, Northeastern University, Shenyang, China","NiuTrans Research, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Northeastern University,School of Computer Science and Engineering,Shenyang,China","institution_ids":["https://openalex.org/I9224756"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]},{"raw_affiliation_string":"NiuTrans Research, Shenyang, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100600701","display_name":"Tong Xiao","orcid":"https://orcid.org/0000-0002-5842-6501"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tong Xiao","raw_affiliation_strings":["Northeastern University,School of Computer Science and Engineering,Shenyang,China","School of Computer Science and Engineering, Northeastern University, Shenyang, China","NiuTrans Research, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Northeastern University,School of Computer Science and Engineering,Shenyang,China","institution_ids":["https://openalex.org/I9224756"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]},{"raw_affiliation_string":"NiuTrans Research, Shenyang, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100370155","display_name":"Jingbo Zhu","orcid":"https://orcid.org/0000-0002-6537-7007"},"institutions":[{"id":"https://openalex.org/I9224756","display_name":"Northeastern University","ror":"https://ror.org/03awzbc87","country_code":"CN","type":"education","lineage":["https://openalex.org/I9224756"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingbo Zhu","raw_affiliation_strings":["Northeastern University,School of Computer Science and Engineering,Shenyang,China","NiuTrans Research, Shenyang, China","School of Computer Science and Engineering, Northeastern University, Shenyang, China"],"affiliations":[{"raw_affiliation_string":"Northeastern University,School of Computer Science and Engineering,Shenyang,China","institution_ids":["https://openalex.org/I9224756"]},{"raw_affiliation_string":"NiuTrans Research, Shenyang, China","institution_ids":[]},{"raw_affiliation_string":"School of Computer Science and Engineering, Northeastern University, Shenyang, China","institution_ids":["https://openalex.org/I9224756"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100349352"],"corresponding_institution_ids":["https://openalex.org/I9224756"],"apc_list":null,"apc_paid":null,"fwci":1.4504,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.83380011,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"11041","last_page":"11045"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7912917137145996},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.7204552888870239},{"id":"https://openalex.org/keywords/speech-translation","display_name":"Speech translation","score":0.6762118339538574},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6275542974472046},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.6017192602157593},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5648013949394226},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.5511967539787292},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5454226136207581},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5248979330062866},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.49609407782554626},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.48361697793006897},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4555046558380127},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.42567798495292664},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4108661413192749}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7912917137145996},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.7204552888870239},{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.6762118339538574},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6275542974472046},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.6017192602157593},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5648013949394226},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.5511967539787292},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5454226136207581},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5248979330062866},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.49609407782554626},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.48361697793006897},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4555046558380127},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.42567798495292664},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4108661413192749},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447494","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447494","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320323086","display_name":"Natural Science Foundation of Liaoning Province","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1731081199","https://openalex.org/W2127141656","https://openalex.org/W2187089797","https://openalex.org/W2466918907","https://openalex.org/W2582956876","https://openalex.org/W2933138175","https://openalex.org/W2963250244","https://openalex.org/W2963532001","https://openalex.org/W3096831136","https://openalex.org/W3113908264","https://openalex.org/W3162249256","https://openalex.org/W3173767661","https://openalex.org/W3176382501","https://openalex.org/W3176711365","https://openalex.org/W3196833881","https://openalex.org/W3209059054","https://openalex.org/W4221163209","https://openalex.org/W4287890956","https://openalex.org/W4382318914","https://openalex.org/W4385570757","https://openalex.org/W4385571132","https://openalex.org/W4385573012","https://openalex.org/W4385822841","https://openalex.org/W4389520404","https://openalex.org/W6637618735","https://openalex.org/W6760184523","https://openalex.org/W6780218876","https://openalex.org/W6784050962","https://openalex.org/W6789693907","https://openalex.org/W6849230176"],"related_works":["https://openalex.org/W2502115930","https://openalex.org/W2482350142","https://openalex.org/W4246396837","https://openalex.org/W3126451824","https://openalex.org/W2990025607","https://openalex.org/W3045103338","https://openalex.org/W3007142233","https://openalex.org/W4399356803","https://openalex.org/W3177132412","https://openalex.org/W3198731777"],"abstract_inverted_index":{"End-to-end":[0],"Speech":[1],"Translation":[2],"(ST)":[3],"aims":[4],"to":[5,59],"convert":[6],"speech":[7,19,39],"into":[8],"target":[9],"text":[10,21,41],"within":[11],"a":[12,69],"unified":[13],"model.":[14],"The":[15],"inherent":[16],"differences":[17],"between":[18],"and":[20,27,40,92],"modalities":[22],"often":[23],"impede":[24],"effective":[25],"cross-modal":[26],"cross-lingual":[28],"transfer.":[29],"Existing":[30],"methods":[31],"typically":[32],"employ":[33],"hard":[34],"alignment":[35],"(H-Align)":[36],"of":[37,64],"individual":[38,74],"segments,":[42],"which":[43],"can":[44],"degrade":[45],"textual":[46],"representations.":[47],"To":[48],"address":[49],"this,":[50],"we":[51],"introduce":[52],"Soft":[53],"Alignment":[54],"(S-Align),":[55],"using":[56],"adversarial":[57],"training":[58],"align":[60],"the":[61,82],"representation":[62],"spaces":[63],"both":[65],"modalities.":[66],"S-Align":[67,86],"creates":[68],"modality-invariant":[70],"space":[71],"while":[72],"preserving":[73],"modality":[75],"quality.":[76],"Experiments":[77],"on":[78,96],"three":[79],"languages":[80],"from":[81],"MuST-C":[83],"dataset":[84],"show":[85],"outperforms":[87],"H-Align":[88],"across":[89],"multiple":[90],"tasks":[91],"offers":[93],"translation":[94,100],"capabilities":[95],"par":[97],"with":[98],"specialized":[99],"models.":[101]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
