{"id":"https://openalex.org/W4389799531","doi":"https://doi.org/10.1109/taslp.2023.3343614","title":"Improving Speech Translation Accuracy and Time Efficiency With Fine-Tuned wav2vec 2.0-Based Speech Segmentation","display_name":"Improving Speech Translation Accuracy and Time Efficiency With Fine-Tuned wav2vec 2.0-Based Speech Segmentation","publication_year":2023,"publication_date":"2023-12-15","ids":{"openalex":"https://openalex.org/W4389799531","doi":"https://doi.org/10.1109/taslp.2023.3343614"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2023.3343614","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3343614","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052080737","display_name":"Ryo Fukuda","orcid":"https://orcid.org/0009-0005-6213-3241"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Ryo Fukuda","raw_affiliation_strings":["Graduate School of Science and Technology, Nara Institute of Science and Technology, Ikoma, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Science and Technology, Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051961507","display_name":"Katsuhito Sudoh","orcid":"https://orcid.org/0000-0002-2122-9846"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Katsuhito Sudoh","raw_affiliation_strings":["Graduate School of Science and Technology and the Data Science Center, Nara Institute of Science and Technology, Ikoma, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Science and Technology and the Data Science Center, Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020994673","display_name":"Satoshi Nakamura","orcid":"https://orcid.org/0000-0001-6956-3803"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Satoshi Nakamura","raw_affiliation_strings":["Graduate School of Science and Technology and the Data Science Center, Nara Institute of Science and Technology, Ikoma, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Science and Technology and the Data Science Center, Nara Institute of Science and Technology, Ikoma, Japan","institution_ids":["https://openalex.org/I75917431"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5052080737"],"corresponding_institution_ids":["https://openalex.org/I75917431"],"apc_list":null,"apc_paid":null,"fwci":0.5245,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.72917451,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"32","issue":null,"first_page":"906","last_page":"916"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.8737479448318481},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6912857294082642},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6518082022666931},{"id":"https://openalex.org/keywords/speech-segmentation","display_name":"Speech segmentation","score":0.5633683204650879},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5296582579612732},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.5247911810874939},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4912716746330261},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4331270754337311},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.4295445680618286}],"concepts":[{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.8737479448318481},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6912857294082642},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6518082022666931},{"id":"https://openalex.org/C207030507","wikidata":"https://www.wikidata.org/wiki/Q2266173","display_name":"Speech segmentation","level":3,"score":0.5633683204650879},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5296582579612732},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.5247911810874939},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4912716746330261},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4331270754337311},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.4295445680618286},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2023.3343614","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3343614","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5299999713897705}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W90695545","https://openalex.org/W1504308419","https://openalex.org/W2048978997","https://openalex.org/W2101105183","https://openalex.org/W2101446498","https://openalex.org/W2158678029","https://openalex.org/W2160538511","https://openalex.org/W2178770890","https://openalex.org/W2745785989","https://openalex.org/W2963532001","https://openalex.org/W2964199361","https://openalex.org/W2972448360","https://openalex.org/W3015383801","https://openalex.org/W3015698636","https://openalex.org/W3035252911","https://openalex.org/W3094903517","https://openalex.org/W3101096852","https://openalex.org/W3116524089","https://openalex.org/W3153896972","https://openalex.org/W3173657420","https://openalex.org/W3176711365","https://openalex.org/W3183148055","https://openalex.org/W3186672448","https://openalex.org/W3213029956","https://openalex.org/W4231547083","https://openalex.org/W4285201770","https://openalex.org/W4297841352","https://openalex.org/W4297841689","https://openalex.org/W6622919620","https://openalex.org/W6628972281","https://openalex.org/W6684662674","https://openalex.org/W6685794611","https://openalex.org/W6712552552","https://openalex.org/W6714054690","https://openalex.org/W6744921104","https://openalex.org/W6759579507","https://openalex.org/W6761205521","https://openalex.org/W6768949332","https://openalex.org/W6780218876","https://openalex.org/W6784050962","https://openalex.org/W6794247771","https://openalex.org/W6802354036","https://openalex.org/W6802744804","https://openalex.org/W6839562487","https://openalex.org/W6898634591"],"related_works":["https://openalex.org/W2514679778","https://openalex.org/W3026276030","https://openalex.org/W2105626703","https://openalex.org/W111933330","https://openalex.org/W2026858810","https://openalex.org/W2144673858","https://openalex.org/W2074307126","https://openalex.org/W2372177018","https://openalex.org/W2159789522","https://openalex.org/W71758884"],"abstract_inverted_index":{"Speech":[0],"translation":[1,139,154,215],"(ST)":[2],"automatically":[3],"converts":[4],"utterances":[5],"in":[6,12,29],"a":[7,51,58,170],"source":[8],"language":[9],"into":[10,18,114,131,161],"text":[11],"another":[13],"language.":[14],"Splitting":[15],"continuous":[16],"speech":[17,23,60,113,160,193,202,214],"shorter":[19,133,162],"segments,":[20],"known":[21],"as":[22],"segmentation,":[24],"plays":[25],"an":[26],"important":[27],"role":[28],"ST.":[30],"Recent":[31],"segmentation":[32,38,52,91,172,194,203],"methods":[33],"trained":[34],"to":[35,94,104,151,166,217],"mimic":[36],"the":[37,72,80,123,175,207,210],"of":[39,71,116,178,213],"ST":[40,76,89,126,153],"corpora":[41],"have":[42],"surpassed":[43],"traditional":[44],"approaches.":[45],"Tsiamas":[46],"et":[47],"al.":[48],"[1]":[49],"proposed":[50],"frame":[53],"classifier":[54],"(SFC)":[55],"based":[56],"on":[57,107,183],"pre-trained":[59],"encoder":[61],"called":[62],"wav2vec":[63,188],"2.0.":[64],"Their":[65],"method,":[66],"named":[67],"SHAS,":[68],"retains":[69],"95-98%":[70],"BLEU":[73],"score":[74],"for":[75,125,191],"corpus":[77,90],"segmentation.":[78],"However,":[79],"segments":[81,115,136,163],"generated":[82],"by":[83,128,158],"SHAS":[84,150],"are":[85],"very":[86],"different":[87],"from":[88],"and":[92,141,156,186,209],"tend":[93],"be":[95],"longer":[96],"with":[97],"multiple":[98],"combined":[99],"utterances.":[100],"This":[101],"is":[102],"due":[103],"SHAS's":[105],"reliance":[106],"length":[108,119,184],"heuristics,":[109],"i.e.,":[110],"it":[111],"splits":[112],"easily":[117],"translatable":[118],"without":[120,181],"fully":[121],"considering":[122],"potential":[124],"improvement":[127],"splitting":[129,159],"them":[130],"even":[132],"segments.":[134],"Longer":[135],"often":[137],"degrade":[138],"quality":[140,208],"ST's":[142],"time":[143,211],"efficiency.":[144],"In":[145],"this":[146],"study,":[147],"we":[148],"extended":[149],"improve":[152],"accuracy":[155],"efficiency":[157,212],"that":[164,200],"correspond":[165],"sentences.":[167],"We":[168],"introduced":[169],"simple":[171],"avlgorithm":[173],"using":[174],"moving":[176],"average":[177],"SFC":[179],"predictions":[180],"relying":[182],"heuristics":[185],"explored":[187],"2.0":[189],"fine-tuning":[190],"improved":[192,206],"prediction.":[195],"Our":[196],"experimental":[197],"results":[198],"reveal":[199],"our":[201],"method":[204],"significantly":[205],"compared":[216],"SHAS.":[218]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
