{"id":"https://openalex.org/W4410770318","doi":"https://doi.org/10.1109/access.2025.3574234","title":"An Analytical Review of Preprocessing Techniques in Bengali Natural Language Processing","display_name":"An Analytical Review of Preprocessing Techniques in Bengali Natural Language Processing","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4410770318","doi":"https://doi.org/10.1109/access.2025.3574234"},"language":"en","primary_location":{"id":"doi:10.1109/access.2025.3574234","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3574234","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2025.3574234","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103049423","display_name":"Sovon Chakraborty","orcid":"https://orcid.org/0000-0003-0765-1060"},"institutions":[{"id":"https://openalex.org/I63169043","display_name":"United International University","ror":"https://ror.org/01tqv1p28","country_code":"BD","type":"education","lineage":["https://openalex.org/I63169043"]}],"countries":["BD"],"is_corresponding":true,"raw_author_name":"Sovon Chakraborty","raw_affiliation_strings":["C2SG Research Group, United International University, Dhaka, Bangladesh"],"raw_orcid":"https://orcid.org/0000-0003-0765-1060","affiliations":[{"raw_affiliation_string":"C2SG Research Group, United International University, Dhaka, Bangladesh","institution_ids":["https://openalex.org/I63169043"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111118639","display_name":"Protiva Das","orcid":null},"institutions":[{"id":"https://openalex.org/I63169043","display_name":"United International University","ror":"https://ror.org/01tqv1p28","country_code":"BD","type":"education","lineage":["https://openalex.org/I63169043"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Protiva Das","raw_affiliation_strings":["C2SG Research Group, United International University, Dhaka, Bangladesh"],"raw_orcid":"https://orcid.org/0009-0003-4705-9229","affiliations":[{"raw_affiliation_string":"C2SG Research Group, United International University, Dhaka, Bangladesh","institution_ids":["https://openalex.org/I63169043"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036437660","display_name":"Shakib Mahmud Dipto","orcid":"https://orcid.org/0000-0003-2704-118X"},"institutions":[{"id":"https://openalex.org/I63169043","display_name":"United International University","ror":"https://ror.org/01tqv1p28","country_code":"BD","type":"education","lineage":["https://openalex.org/I63169043"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Shakib Mahmud Dipto","raw_affiliation_strings":["C2SG Research Group, United International University, Dhaka, Bangladesh"],"raw_orcid":"https://orcid.org/0000-0003-2704-118X","affiliations":[{"raw_affiliation_string":"C2SG Research Group, United International University, Dhaka, Bangladesh","institution_ids":["https://openalex.org/I63169043"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115585580","display_name":"Md. Aktaruzzaman Pramanik","orcid":null},"institutions":[{"id":"https://openalex.org/I63169043","display_name":"United International University","ror":"https://ror.org/01tqv1p28","country_code":"BD","type":"education","lineage":["https://openalex.org/I63169043"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Md. Aktaruzzaman Pramanik","raw_affiliation_strings":["C2SG Research Group, United International University, Dhaka, Bangladesh"],"raw_orcid":"https://orcid.org/0000-0003-4049-3332","affiliations":[{"raw_affiliation_string":"C2SG Research Group, United International University, Dhaka, Bangladesh","institution_ids":["https://openalex.org/I63169043"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038795848","display_name":"Jannatun Noor","orcid":"https://orcid.org/0000-0001-9669-151X"},"institutions":[{"id":"https://openalex.org/I63169043","display_name":"United International University","ror":"https://ror.org/01tqv1p28","country_code":"BD","type":"education","lineage":["https://openalex.org/I63169043"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Jannatun Noor","raw_affiliation_strings":["C2SG Research Group, United International University, Dhaka, Bangladesh"],"raw_orcid":"https://orcid.org/0000-0001-9669-151X","affiliations":[{"raw_affiliation_string":"C2SG Research Group, United International University, Dhaka, Bangladesh","institution_ids":["https://openalex.org/I63169043"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5103049423"],"corresponding_institution_ids":["https://openalex.org/I63169043"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":2.0776,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.88172075,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"13","issue":null,"first_page":"112428","last_page":"112445"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.8536999821662903,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.8536999821662903,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.7843000292778015,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.7031000256538391,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bengali","display_name":"Bengali","score":0.8773809671401978},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7841264009475708},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.576033353805542},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5615745782852173},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.469598650932312}],"concepts":[{"id":"https://openalex.org/C19235068","wikidata":"https://www.wikidata.org/wiki/Q9610","display_name":"Bengali","level":2,"score":0.8773809671401978},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7841264009475708},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.576033353805542},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5615745782852173},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.469598650932312}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/access.2025.3574234","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3574234","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:digitalcommons.odu.edu:computerscience_fac_pubs-1382","is_oa":true,"landing_page_url":"https://digitalcommons.odu.edu/computerscience_fac_pubs/377","pdf_url":null,"source":{"id":"https://openalex.org/S4377196314","display_name":"ODU Digital Commons (Old Dominion University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I81365321","host_organization_name":"Old Dominion University","host_organization_lineage":["https://openalex.org/I81365321"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computer Science Faculty Publications","raw_type":"article"},{"id":"pmh:oai:doaj.org/article:a728484021574dbe962333dcf0b6af56","is_oa":true,"landing_page_url":"https://doaj.org/article/a728484021574dbe962333dcf0b6af56","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 13, Pp 112428-112445 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2025.3574234","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3574234","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.75,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":112,"referenced_works":["https://openalex.org/W2010845851","https://openalex.org/W2035594617","https://openalex.org/W2071324782","https://openalex.org/W2075436750","https://openalex.org/W2202042777","https://openalex.org/W2279992407","https://openalex.org/W2293804607","https://openalex.org/W2591223967","https://openalex.org/W2591546050","https://openalex.org/W2783557991","https://openalex.org/W2902663225","https://openalex.org/W2902831883","https://openalex.org/W2903269720","https://openalex.org/W2921362618","https://openalex.org/W2978612210","https://openalex.org/W2987972909","https://openalex.org/W2999297255","https://openalex.org/W3005930218","https://openalex.org/W3011860286","https://openalex.org/W3012999699","https://openalex.org/W3013120195","https://openalex.org/W3015243600","https://openalex.org/W3016323721","https://openalex.org/W3016831976","https://openalex.org/W3016888066","https://openalex.org/W3016997976","https://openalex.org/W3023115097","https://openalex.org/W3034196716","https://openalex.org/W3046402235","https://openalex.org/W3091964456","https://openalex.org/W3093600732","https://openalex.org/W3094223050","https://openalex.org/W3111611403","https://openalex.org/W3120141991","https://openalex.org/W3129162509","https://openalex.org/W3137015680","https://openalex.org/W3157706434","https://openalex.org/W3160991089","https://openalex.org/W3162112692","https://openalex.org/W3176969785","https://openalex.org/W3179463712","https://openalex.org/W3180896826","https://openalex.org/W3185582557","https://openalex.org/W3185626188","https://openalex.org/W3188060693","https://openalex.org/W3205211681","https://openalex.org/W3208444381","https://openalex.org/W3211091508","https://openalex.org/W3211341841","https://openalex.org/W3213851456","https://openalex.org/W4200259088","https://openalex.org/W4205395483","https://openalex.org/W4206598708","https://openalex.org/W4210609034","https://openalex.org/W4210784023","https://openalex.org/W4213456769","https://openalex.org/W4214609460","https://openalex.org/W4220850917","https://openalex.org/W4225315139","https://openalex.org/W4226278482","https://openalex.org/W4281689302","https://openalex.org/W4283319526","https://openalex.org/W4289822455","https://openalex.org/W4290714600","https://openalex.org/W4292220009","https://openalex.org/W4306249737","https://openalex.org/W4308343432","https://openalex.org/W4309693150","https://openalex.org/W4309697990","https://openalex.org/W4312119705","https://openalex.org/W4313392635","https://openalex.org/W4320234102","https://openalex.org/W4323059940","https://openalex.org/W4372355086","https://openalex.org/W4377820918","https://openalex.org/W4378470138","https://openalex.org/W4378980172","https://openalex.org/W4385477796","https://openalex.org/W4385574339","https://openalex.org/W4385801334","https://openalex.org/W4386752790","https://openalex.org/W4386953668","https://openalex.org/W4387491969","https://openalex.org/W4388854181","https://openalex.org/W4389182848","https://openalex.org/W4389518323","https://openalex.org/W4389849800","https://openalex.org/W4390667950","https://openalex.org/W4391227277","https://openalex.org/W4391263491","https://openalex.org/W4391407478","https://openalex.org/W4391623165","https://openalex.org/W4391769289","https://openalex.org/W4392188446","https://openalex.org/W4393864172","https://openalex.org/W4395459757","https://openalex.org/W4395462509","https://openalex.org/W4396233313","https://openalex.org/W4399829645","https://openalex.org/W4400929358","https://openalex.org/W4401046976","https://openalex.org/W4401323547","https://openalex.org/W4401386312","https://openalex.org/W4402215748","https://openalex.org/W4402671426","https://openalex.org/W4404067568","https://openalex.org/W4404815086","https://openalex.org/W6790637388","https://openalex.org/W6800601033","https://openalex.org/W6849922490","https://openalex.org/W6862574157","https://openalex.org/W6867829382"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2084490135","https://openalex.org/W4298170800","https://openalex.org/W2895890526","https://openalex.org/W2028401451","https://openalex.org/W2769530083","https://openalex.org/W3161006337","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Research":[0],"in":[1,19,27,59,111,148,158,189],"Bengali":[2,28,90,142,160],"Natural":[3,66],"Language":[4,67],"Processing":[5,68],"(BNLP)":[6],"is":[7],"rapidly":[8],"expanding.":[9],"Despite":[10],"being":[11],"one":[12],"of":[13,78,108,141,186],"the":[14,20,89,135,153,176,184,191],"most":[15],"widely":[16],"spoken":[17],"languages":[18,32],"world,":[21],"BNLP":[22,112],"research":[23,146],"remains":[24],"insufficient,":[25],"particularly":[26],"speech":[29,43,128],"recognition.":[30],"The":[31],"rich":[33],"morphology,":[34],"agglutinative":[35],"structure,":[36],"and":[37,42,61,132,145,162,173],"diverse":[38],"dialects":[39],"make":[40],"text":[41,130,161],"processing":[44,159],"especially":[45],"challenging.":[46],"However,":[47],"these":[48,80],"challenges":[49,154],"can":[50],"be":[51],"addressed":[52],"with":[53],"effective":[54,187],"preprocessing":[55,86,109,168,188],"techniques.":[56],"Various":[57],"organizations":[58],"Bangladesh":[60],"West":[62],"Bengal":[63],"are":[64],"integrating":[65],"(NLP)":[69],"into":[70],"their":[71,171],"services,":[72],"but":[73],"without":[74],"a":[75,95,105],"thorough":[76],"understanding":[77],"preprocessing,":[79],"implementations":[81],"remain":[82],"incomplete.":[83],"Applying":[84],"proper":[85],"techniques":[87,110],"to":[88],"language":[91,143],"will":[92],"serve":[93],"as":[94,122],"foundation":[96],"for":[97,181],"developing":[98],"robust":[99],"NLP":[100],"applications.":[101],"This":[102],"paper":[103,136,177],"presents":[104],"comprehensive":[106],"review":[107],"based":[113],"on":[114],"state-of-the-art":[115],"research.":[116],"It":[117,150],"covers":[118],"key":[119],"areas":[120,147],"such":[121],"sentiment":[123],"analysis,":[124],"Named":[125],"Entity":[126],"Recognition,":[127],"recognition,":[129],"categorization,":[131],"summarization.":[133],"First,":[134],"provides":[137],"an":[138],"in-depth":[139],"discussion":[140],"characteristics":[144],"BNLP.":[149],"then":[151],"explores":[152],"faced":[155],"by":[156],"researchers":[157],"speech.":[163],"Additionally,":[164],"it":[165],"details":[166],"various":[167],"techniques,":[169],"highlighting":[170],"advantages":[172],"disadvantages.":[174],"Finally,":[175],"examines":[178],"future":[179],"directions":[180],"BNLP,":[182],"emphasizing":[183],"role":[185],"advancing":[190],"field.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
