{"id":"https://openalex.org/W1998353461","doi":"https://doi.org/10.1145/2629670","title":"Stemming resource-poor Indian languages","display_name":"Stemming resource-poor Indian languages","publication_year":2014,"publication_date":"2014-10-03","ids":{"openalex":"https://openalex.org/W1998353461","doi":"https://doi.org/10.1145/2629670","mag":"1998353461"},"language":"en","primary_location":{"id":"doi:10.1145/2629670","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2629670","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079799201","display_name":"Navanath Saharia","orcid":"https://orcid.org/0000-0001-8908-9395"},"institutions":[{"id":"https://openalex.org/I126601174","display_name":"Tezpur University","ror":"https://ror.org/005x56091","country_code":"IN","type":"education","lineage":["https://openalex.org/I126601174"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Navanath Saharia","raw_affiliation_strings":["Tezpur University"],"affiliations":[{"raw_affiliation_string":"Tezpur University","institution_ids":["https://openalex.org/I126601174"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109343788","display_name":"Utpal Sharma","orcid":null},"institutions":[{"id":"https://openalex.org/I126601174","display_name":"Tezpur University","ror":"https://ror.org/005x56091","country_code":"IN","type":"education","lineage":["https://openalex.org/I126601174"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Utpal Sharma","raw_affiliation_strings":["Tezpur University"],"affiliations":[{"raw_affiliation_string":"Tezpur University","institution_ids":["https://openalex.org/I126601174"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049180880","display_name":"Jugal Kalita","orcid":"https://orcid.org/0000-0002-8765-7018"},"institutions":[{"id":"https://openalex.org/I888729015","display_name":"University of Colorado Colorado Springs","ror":"https://ror.org/054spjc55","country_code":"US","type":"education","lineage":["https://openalex.org/I888729015"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jugal Kalita","raw_affiliation_strings":["University of Colorado, Colorado Springs",", University of Colorado Colorado Springs#TAB#"],"affiliations":[{"raw_affiliation_string":"University of Colorado, Colorado Springs","institution_ids":["https://openalex.org/I888729015"]},{"raw_affiliation_string":", University of Colorado Colorado Springs#TAB#","institution_ids":["https://openalex.org/I888729015"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5079799201"],"corresponding_institution_ids":["https://openalex.org/I126601174"],"apc_list":null,"apc_paid":null,"fwci":2.045,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.88868597,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"13","issue":"3","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/assamese","display_name":"Assamese","score":0.9907513856887817},{"id":"https://openalex.org/keywords/bengali","display_name":"Bengali","score":0.9712491035461426},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.729239284992218},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.583805501461029},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5565923452377319},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5261340141296387},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5005619525909424},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.45107242465019226},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.27666935324668884}],"concepts":[{"id":"https://openalex.org/C2777834912","wikidata":"https://www.wikidata.org/wiki/Q29401","display_name":"Assamese","level":2,"score":0.9907513856887817},{"id":"https://openalex.org/C19235068","wikidata":"https://www.wikidata.org/wiki/Q9610","display_name":"Bengali","level":2,"score":0.9712491035461426},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.729239284992218},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.583805501461029},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5565923452377319},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5261340141296387},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5005619525909424},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.45107242465019226},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.27666935324668884},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2629670","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2629670","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.5899999737739563}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W26591655","https://openalex.org/W147166030","https://openalex.org/W165283731","https://openalex.org/W240969542","https://openalex.org/W1238390741","https://openalex.org/W1511329540","https://openalex.org/W1517312174","https://openalex.org/W1538602818","https://openalex.org/W1550597138","https://openalex.org/W1562911371","https://openalex.org/W1575194640","https://openalex.org/W1654045153","https://openalex.org/W1819204739","https://openalex.org/W1969158886","https://openalex.org/W1971285215","https://openalex.org/W1974025666","https://openalex.org/W1979076595","https://openalex.org/W1987680958","https://openalex.org/W1991133427","https://openalex.org/W2007976151","https://openalex.org/W2012376027","https://openalex.org/W2013997717","https://openalex.org/W2026227174","https://openalex.org/W2026868490","https://openalex.org/W2031391805","https://openalex.org/W2033937535","https://openalex.org/W2047603832","https://openalex.org/W2054997212","https://openalex.org/W2061801881","https://openalex.org/W2070611574","https://openalex.org/W2084849788","https://openalex.org/W2086790337","https://openalex.org/W2098162425","https://openalex.org/W2117621558","https://openalex.org/W2123591077","https://openalex.org/W2138958299","https://openalex.org/W2143722445","https://openalex.org/W2147880316","https://openalex.org/W2157821910","https://openalex.org/W2164514763","https://openalex.org/W2167301710","https://openalex.org/W2183112185","https://openalex.org/W2252067209","https://openalex.org/W2321947820","https://openalex.org/W2547599276","https://openalex.org/W2607303097","https://openalex.org/W3183153947","https://openalex.org/W4285719527","https://openalex.org/W6601052559"],"related_works":["https://openalex.org/W2969503791","https://openalex.org/W2525649817","https://openalex.org/W567954170","https://openalex.org/W1517593838","https://openalex.org/W2790118738","https://openalex.org/W3162775403","https://openalex.org/W336931407","https://openalex.org/W2533933489","https://openalex.org/W1591809911","https://openalex.org/W2888668458"],"abstract_inverted_index":{"Stemming":[0],"is":[1,44,110,169],"a":[2,45,50,66,77,91],"basic":[3],"method":[4],"for":[5,74,130,137,145,175],"morphological":[6],"normalization":[7],"of":[8,20,68,80,166],"natural":[9],"language":[10],"texts.":[11],"In":[12],"this":[13],"study,":[14],"we":[15,64,93,125],"focus":[16],"on":[17,173,182],"the":[18,101,108,114,127,152],"problem":[19],"stemming":[21,128,174],"several":[22],"resource-poor":[23],"languages":[24,76],"from":[25,56],"Eastern":[26],"India,":[27],"viz.,":[28],"Assamese,":[29,36],"Bengali,":[30],"Bishnupriya":[31,39,146,176],"Manipuri":[32,40,147,177],"and":[33,38,61,139,141,143,148,162,178,184,194],"Bodo.":[34,179],"While":[35],"Bengali":[37,140,185],"are":[41,82],"Indo-Aryan,":[42],"Bodo":[43],"Tibeto-Burman":[46],"language.":[47,132],"We":[48,71,133,155],"design":[49],"rule-based":[51],"approach":[52,98],"to":[53,99],"remove":[54],"suffixes":[55,81],"words.":[57,70],"To":[58],"reduce":[59],"over-stemming":[60],"under-stemming":[62],"errors,":[63],"introduce":[65,94],"dictionary":[67],"frequent":[69],"observe":[72],"that,":[73],"these":[75],"dominant":[78],"amount":[79],"single":[83],"letters":[84],"creating":[85],"problems":[86],"during":[87],"suffix":[88],"stripping.":[89],"As":[90,165],"result,":[92],"an":[95],"HMM-based":[96],"hybrid":[97,153],"classify":[100],"mis-matched":[102],"last":[103],"character.":[104],"For":[105],"each":[106,123,131],"word,":[107],"stem":[109],"extracted":[111],"by":[112],"calculating":[113],"most":[115],"probable":[116],"path":[117],"in":[118],"four":[119],"HMM":[120],"states.":[121],"At":[122],"step":[124],"measure":[126],"accuracy":[129,136],"obtain":[134],"94%":[135],"Assamese":[138,183],"87%,":[142],"82%":[144],"Bodo,":[149],"respectively,":[150],"using":[151],"approach.":[154],"compare":[156],"our":[157],"work":[158,172,192],"with":[159],"Morfessor":[160],"[Creutz":[161],"Lagus":[163],"2005].":[164],"now,":[167],"there":[168],"no":[170],"reported":[171],"Our":[180],"results":[181],"show":[186],"significant":[187],"improvement":[188],"over":[189],"prior":[190],"published":[191],"[Sarkar":[193],"Bandyopadhyay":[195],"2008;":[196],"Sharma":[197],"et":[198],"al.":[199],"2002,":[200],"2003].":[201]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
