{"id":"https://openalex.org/W4392798757","doi":"https://doi.org/10.1145/3638584.3638634","title":"Vietnamese Voice2Text: A Web Application for Whisper Implementation in Vietnamese Automatic Speech Recognition Tasks: Vietnamese Voice2Text","display_name":"Vietnamese Voice2Text: A Web Application for Whisper Implementation in Vietnamese Automatic Speech Recognition Tasks: Vietnamese Voice2Text","publication_year":2023,"publication_date":"2023-12-08","ids":{"openalex":"https://openalex.org/W4392798757","doi":"https://doi.org/10.1145/3638584.3638634"},"language":"en","primary_location":{"id":"doi:10.1145/3638584.3638634","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3638584.3638634","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 7th International Conference on Computer Science and Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103098718","display_name":"Quangphuoc Nguyen","orcid":"https://orcid.org/0009-0008-1333-1358"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Quangphuoc Nguyen","raw_affiliation_strings":["FPT University, Vietnam"],"raw_orcid":"https://orcid.org/0009-0008-1333-1358","affiliations":[{"raw_affiliation_string":"FPT University, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103028651","display_name":"Ngocminh Nguyen","orcid":"https://orcid.org/0009-0008-3015-4904"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Ngocminh Nguyen","raw_affiliation_strings":["FPT University, Vietnam"],"raw_orcid":"https://orcid.org/0009-0008-3015-4904","affiliations":[{"raw_affiliation_string":"FPT University, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050914404","display_name":"T.K. Dang","orcid":null},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Thanhluan Dang","raw_affiliation_strings":["FPT University, Vietnam"],"raw_orcid":"https://orcid.org/0009-0007-7775-5072","affiliations":[{"raw_affiliation_string":"FPT University, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077540441","display_name":"Vanha Tran","orcid":"https://orcid.org/0000-0003-2714-6707"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Vanha Tran","raw_affiliation_strings":["FPT University, Vietnam"],"raw_orcid":"https://orcid.org/0000-0003-2714-6707","affiliations":[{"raw_affiliation_string":"FPT University, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5103098718"],"corresponding_institution_ids":["https://openalex.org/I109689652"],"apc_list":null,"apc_paid":null,"fwci":0.3408,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.68569142,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"312","last_page":"318"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vietnamese","display_name":"Vietnamese","score":0.986614465713501},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7929794788360596},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4889470338821411},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35787057876586914},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.349029004573822},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09391102194786072}],"concepts":[{"id":"https://openalex.org/C103621254","wikidata":"https://www.wikidata.org/wiki/Q9199","display_name":"Vietnamese","level":2,"score":0.986614465713501},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7929794788360596},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4889470338821411},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35787057876586914},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.349029004573822},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09391102194786072},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3638584.3638634","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3638584.3638634","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 7th International Conference on Computer Science and Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2160815625","https://openalex.org/W2163929346","https://openalex.org/W3132191748","https://openalex.org/W3134751001","https://openalex.org/W4200068651","https://openalex.org/W4312782045","https://openalex.org/W4392788509"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2901286616","https://openalex.org/W2291461084","https://openalex.org/W2050882094","https://openalex.org/W3113733647","https://openalex.org/W4206924063","https://openalex.org/W3042674643","https://openalex.org/W1844055093","https://openalex.org/W61780229","https://openalex.org/W3204019825"],"abstract_inverted_index":{"The":[0,76,117,158],"publication":[1],"of":[2,13,202,247,260],"the":[3,11,58,65,103,144,168,173,184,187,199,208,218,225,230,237,241,245,256],"Whisper":[4,50],"model":[5,59],"by":[6,194],"OpenAI":[7],"inspired":[8],"us":[9],"with":[10,36,51,69,236],"idea":[12],"a":[14,33,61,84,125],"web":[15,34,77,118,169],"platform":[16],"that":[17,102,143,203,234],"provides":[18],"voice-to-text":[19],"conversion":[20],"services":[21],"for":[22,45,72,88,129,183,211,224],"Vietnamese":[23],"people.":[24],"Using":[25],"Whisper\u2019s":[26],"powerful":[27],"generalization":[28],"capabilities,":[29],"we":[30],"have":[31],"developed":[32,81,122],"application":[35,78,104,119,145,259],"three":[37,70,73],"main":[38],"features:":[39],"record-to-text,":[40],"file-to-text,":[41],"and":[42,109,115,150,156,171,178,196,229,254,258],"subtitles":[43,209],"generator":[44,210],"YouTube.":[46],"We":[47],"first":[48],"fine-tuned":[49],"our":[52],"target":[53],"language":[54],"dataset":[55],"then":[56,172],"deployed":[57],"as":[60,221],"Rest":[62],"API":[63],"using":[64,82,123],"Python":[66],"Flask":[67],"framework":[68],"paths":[71],"different":[74],"tasks.":[75],"has":[79,120],"been":[80,121],"ReactJS,":[83,124],"popular":[85,126],"JavaScript":[86,127],"library":[87,128],"building":[89,130],"user":[90,131],"interfaces.":[91,132],"Its":[92,133],"architecture":[93,134],"is":[94,105,135,146],"grounded":[95,136],"in":[96,137,267],"component-based":[97,138],"design":[98,139],"principles,":[99,140],"which":[100,141],"means":[101,142],"structured":[106,147],"into":[107,148],"reusable":[108,149],"modular":[110,151],"components,":[111,152],"enhancing":[112,153],"code":[113,154],"maintainability":[114,155],"scalability.":[116,157],"record-to-text":[159],"function":[160],"will":[161,175,189,197,232],"allow":[162],"users":[163,195,215],"to":[164,180,227,240],"record":[165],"audio":[166,174,191,204],"on":[167,244],"page,":[170],"be":[176],"processed":[177],"converted":[179],"text.":[181],"As":[182],"file-to-text":[185],"function,":[186,213],"website":[188,226,231],"receive":[190],"files":[192],"uploaded":[193],"return":[198],"transcript":[200,238],"text":[201],"file.":[205],"And":[206],"finally":[207],"YouTube":[212,219],"where":[214],"can":[216,252],"enter":[217],"link":[220],"input,":[222],"wait":[223],"process":[228],"display":[233],"video":[235,242],"attached":[239],"based":[243],"timestamps":[246],"each":[248],"transcript.":[249],"This":[250],"project":[251],"inspire":[253],"encourage":[255],"testing":[257],"new":[261],"automatic":[262],"speech":[263],"recognition":[264],"(ASR)":[265],"models":[266],"specific":[268],"applications.":[269]},"counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
