{"id":"https://openalex.org/W6941233924","doi":"https://doi.org/10.1109/tencon61640.2024.10902805","title":"Significance of Corpus Quality for Direct Speech-to-Text Translation Systems","display_name":"Significance of Corpus Quality for Direct Speech-to-Text Translation Systems","publication_year":2024,"publication_date":"2024-12-01","ids":{"openalex":"https://openalex.org/W6941233924","doi":"https://doi.org/10.1109/tencon61640.2024.10902805"},"language":"en","primary_location":{"id":"doi:10.1109/tencon61640.2024.10902805","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tencon61640.2024.10902805","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"TENCON 2024 - 2024 IEEE Region 10 Conference (TENCON)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tonmoy Rajkhowa","orcid":null},"institutions":[{"id":"https://openalex.org/I56404289","display_name":"Indian Institute of Technology BHU","ror":"https://ror.org/01kh5gc44","country_code":"IN","type":"education","lineage":["https://openalex.org/I56404289"]},{"id":"https://openalex.org/I91357014","display_name":"Banaras Hindu University","ror":"https://ror.org/04cdn2797","country_code":"IN","type":"education","lineage":["https://openalex.org/I91357014"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Tonmoy Rajkhowa","raw_affiliation_strings":["Indian Institute of Technology (BHU),Dept. of Electronics Engineering,Varanasi,India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology (BHU),Dept. of Electronics Engineering,Varanasi,India","institution_ids":["https://openalex.org/I56404289","https://openalex.org/I91357014"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Amartya Roy Chowdhury","orcid":null},"institutions":[{"id":"https://openalex.org/I4210152718","display_name":"Indian Institute of Technology Dharwad","ror":"https://ror.org/0509djg30","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210152718"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Amartya Roy Chowdhury","raw_affiliation_strings":["Indian Institute of Technology,Dept. of EECE,Dharwad,India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology,Dept. of EECE,Dharwad,India","institution_ids":["https://openalex.org/I4210152718"]}]},{"author_position":"last","author":{"id":null,"display_name":"Lokesh Kumar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210152718","display_name":"Indian Institute of Technology Dharwad","ror":"https://ror.org/0509djg30","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210152718"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Lokesh Kumar","raw_affiliation_strings":["Indian Institute of Technology,Dept. of EECE,Dharwad,India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology,Dept. of EECE,Dharwad,India","institution_ids":["https://openalex.org/I4210152718"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I56404289","https://openalex.org/I91357014"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.46394054,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1194","last_page":"1197"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T10451","display_name":"Mycorrhizal Fungi and Plant Interactions","score":0.4878000020980835,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10451","display_name":"Mycorrhizal Fungi and Plant Interactions","score":0.4878000020980835,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.12860000133514404,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10825","display_name":"Plant Pathogens and Fungal Diseases","score":0.05550000071525574,"subfield":{"id":"https://openalex.org/subfields/1307","display_name":"Cell Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.705299973487854},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.6614000201225281},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.589900016784668},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.459199994802475},{"id":"https://openalex.org/keywords/text-corpus","display_name":"Text corpus","score":0.4341999888420105},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.40950000286102295}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7803000211715698},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7346000075340271},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.705299973487854},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.6614000201225281},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6046000123023987},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.589900016784668},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.459199994802475},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.4341999888420105},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.40950000286102295},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.34279999136924744},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.31040000915527344},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2881999909877777},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C2780366754","wikidata":"https://www.wikidata.org/wiki/Q7494857","display_name":"Speech translation","level":3,"score":0.2736000120639801},{"id":"https://openalex.org/C135784402","wikidata":"https://www.wikidata.org/wiki/Q6958279","display_name":"Evaluation of machine translation","level":5,"score":0.2632000148296356},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.2621000111103058}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tencon61640.2024.10902805","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tencon61640.2024.10902805","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"TENCON 2024 - 2024 IEEE Region 10 Conference (TENCON)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2011168910","https://openalex.org/W2101105183","https://openalex.org/W2593011301","https://openalex.org/W2933138175","https://openalex.org/W2964172053","https://openalex.org/W3006988520","https://openalex.org/W3015698636","https://openalex.org/W3113676066","https://openalex.org/W3156404059","https://openalex.org/W3174032041","https://openalex.org/W3174446152","https://openalex.org/W3197771105","https://openalex.org/W4385245566","https://openalex.org/W4388854336","https://openalex.org/W6773238793","https://openalex.org/W6775053297","https://openalex.org/W6778411868","https://openalex.org/W6810354702","https://openalex.org/W6839510803","https://openalex.org/W6846372227"],"related_works":[],"abstract_inverted_index":{"Performance":[0],"improvement":[1],"in":[2,36,41,53,65,95],"Direct":[3],"Speech-to-":[4],"Text":[5],"Translation":[6],"systems":[7],"are":[8],"mainly":[9],"attributed":[10],"to":[11,20,56],"its":[12],"training":[13],"using":[14],"a":[15,75,78,92],"larger":[16,25],"corpora":[17,26,55],"which":[18],"led":[19],"the":[21,42,49,54,66,96],"development":[22],"of":[23,51,68],"many":[24],"containing":[27,77],"data":[28],"scrapped":[29],"from":[30],"various":[31],"online":[32],"sources,":[33],"This":[34],"resulted":[35],"quality":[37,52,61,86],"issues":[38],"and":[39,84,90],"impracticality":[40],"long":[43],"run.":[44],"Hence,":[45],"this":[46],"work":[47],"investigates":[48],"role":[50],"determine":[57],"whether":[58],"size":[59],"or":[60],"has":[62,91],"more":[63,88],"contribution":[64,94],"performance":[67],"these":[69],"systems.":[70],"Experimental":[71],"results":[72],"indicate":[73],"that":[74],"corpus":[76],"richer":[79],"vocabulary":[80],"with":[81],"better":[82],"translation":[83],"audio":[85],"is":[87],"effective":[89],"greater":[93],"performance.":[97]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
