{"id":"https://openalex.org/W4381389692","doi":"https://doi.org/10.1145/3587828.3587872","title":"Bangla-Wave: Improving Bangla Automatic Speech Recognition Utilizing N-gram Language Models","display_name":"Bangla-Wave: Improving Bangla Automatic Speech Recognition Utilizing N-gram Language Models","publication_year":2023,"publication_date":"2023-02-23","ids":{"openalex":"https://openalex.org/W4381389692","doi":"https://doi.org/10.1145/3587828.3587872"},"language":"en","primary_location":{"id":"doi:10.1145/3587828.3587872","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3587828.3587872","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 12th International Conference on Software and Computer Applications","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052030399","display_name":"Mohammed Rakib","orcid":"https://orcid.org/0000-0001-6201-3729"},"institutions":[{"id":"https://openalex.org/I157386601","display_name":"North South University","ror":"https://ror.org/05wdbfp45","country_code":"BD","type":"education","lineage":["https://openalex.org/I157386601"]}],"countries":["BD"],"is_corresponding":true,"raw_author_name":"Mohammed Rakib","raw_affiliation_strings":["Apurba-NSU R&amp;D Lab, North South University, Bangladesh"],"raw_orcid":"https://orcid.org/0000-0001-6201-3729","affiliations":[{"raw_affiliation_string":"Apurba-NSU R&amp;D Lab, North South University, Bangladesh","institution_ids":["https://openalex.org/I157386601"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087605642","display_name":"Md. Ismail Hossain","orcid":"https://orcid.org/0000-0002-4665-0530"},"institutions":[{"id":"https://openalex.org/I157386601","display_name":"North South University","ror":"https://ror.org/05wdbfp45","country_code":"BD","type":"education","lineage":["https://openalex.org/I157386601"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Md. Ismail Hossain","raw_affiliation_strings":["Apurba-NSU R&amp;D Lab, North South University, Bangladesh"],"raw_orcid":"https://orcid.org/0000-0002-4665-0530","affiliations":[{"raw_affiliation_string":"Apurba-NSU R&amp;D Lab, North South University, Bangladesh","institution_ids":["https://openalex.org/I157386601"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062072064","display_name":"Nabeel Mohammed","orcid":"https://orcid.org/0000-0002-7661-3570"},"institutions":[{"id":"https://openalex.org/I157386601","display_name":"North South University","ror":"https://ror.org/05wdbfp45","country_code":"BD","type":"education","lineage":["https://openalex.org/I157386601"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Nabeel Mohammed","raw_affiliation_strings":["Apurba-NSU R&amp;D Lab, North South University, Bangladesh"],"raw_orcid":"https://orcid.org/0000-0002-7661-3570","affiliations":[{"raw_affiliation_string":"Apurba-NSU R&amp;D Lab, North South University, Bangladesh","institution_ids":["https://openalex.org/I157386601"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029426102","display_name":"Fuad Rahman","orcid":"https://orcid.org/0000-0002-8670-7124"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fuad Rahman","raw_affiliation_strings":["Apurba Technologies, USA"],"raw_orcid":"https://orcid.org/0000-0002-8670-7124","affiliations":[{"raw_affiliation_string":"Apurba Technologies, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5052030399"],"corresponding_institution_ids":["https://openalex.org/I157386601"],"apc_list":null,"apc_paid":null,"fwci":0.852,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.78197224,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"297","last_page":"301"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bengali","display_name":"Bengali","score":0.9964333772659302},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8358510732650757},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6951430439949036},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6345605850219727},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6341258883476257},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5494314432144165},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.43541640043258667},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.4193321168422699},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.2701719105243683}],"concepts":[{"id":"https://openalex.org/C19235068","wikidata":"https://www.wikidata.org/wiki/Q9610","display_name":"Bengali","level":2,"score":0.9964333772659302},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8358510732650757},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6951430439949036},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6345605850219727},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6341258883476257},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5494314432144165},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.43541640043258667},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.4193321168422699},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2701719105243683}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3587828.3587872","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3587828.3587872","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 12th International Conference on Software and Computer Applications","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6899999976158142,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2158195707","https://openalex.org/W3036601975","https://openalex.org/W3099919888","https://openalex.org/W3139878283","https://openalex.org/W3176252609","https://openalex.org/W3200650723","https://openalex.org/W4283755617"],"related_works":["https://openalex.org/W2044408773","https://openalex.org/W2130717063","https://openalex.org/W2126322296","https://openalex.org/W2533508831","https://openalex.org/W4383605217","https://openalex.org/W2742699532","https://openalex.org/W4281389974","https://openalex.org/W1911859126","https://openalex.org/W2158598479","https://openalex.org/W1964178275"],"abstract_inverted_index":{"Although":[0],"over":[1],"300M":[2],"around":[3],"the":[4,27,30,56,66,73,87,98,132],"world":[5],"speak":[6],"Bangla,":[7],"scant":[8],"work":[9],"has":[10],"been":[11],"done":[12],"in":[13,65],"improving":[14],"Bangla":[15,20,125],"voice-to-text":[16],"transcription":[17],"due":[18],"to":[19,95,121],"being":[21],"a":[22,82,111,123],"low-resource":[23],"language.":[24],"However,":[25],"with":[26],"introduction":[28],"of":[29,49,100],"Bengali":[31,52,62,77],"Common":[32,53],"Voice":[33,54],"9.0":[34],"speech":[35,50,63],"dataset,":[36],"Automatic":[37],"Speech":[38],"Recognition":[39],"(ASR)":[40],"models":[41,79],"can":[42],"now":[43],"be":[44],"significantly":[45,96],"improved.":[46],"With":[47],"399hrs":[48],"recordings,":[51],"is":[55,129],"largest":[57],"and":[58,118],"most":[59],"diversified":[60],"open-source":[61],"corpus":[64],"world.":[67],"In":[68],"this":[69],"paper,":[70],"we":[71,114],"outperform":[72],"State-of-the-Art":[74],"(SOTA)":[75],"pretrained":[76,83],"ASR":[78,102,126,134],"by":[80,104],"finetuning":[81],"wav2vec2":[84],"model":[85,103,109,127],"on":[86],"common":[88],"voice":[89],"dataset.":[90],"We":[91],"also":[92],"demonstrate":[93],"how":[94],"improve":[97],"performance":[99],"an":[101,106],"adding":[105],"n-gram":[107],"language":[108],"as":[110],"post-processor.":[112],"Finally,":[113],"do":[115],"some":[116],"experiments":[117],"hyperparameter":[119],"tuning":[120],"generate":[122],"robust":[124],"that":[128],"better":[130],"than":[131],"existing":[133],"models.":[135]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
