{"id":"https://openalex.org/W2157821910","doi":"https://doi.org/10.1145/1390749.1390765","title":"An unsupervised Hindi stemmer with heuristic improvements","display_name":"An unsupervised Hindi stemmer with heuristic improvements","publication_year":2008,"publication_date":"2008-07-24","ids":{"openalex":"https://openalex.org/W2157821910","doi":"https://doi.org/10.1145/1390749.1390765","mag":"2157821910"},"language":"en","primary_location":{"id":"doi:10.1145/1390749.1390765","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1390749.1390765","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the second workshop on Analytics for noisy unstructured text data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004879235","display_name":"Amaresh Kumar Pandey","orcid":null},"institutions":[{"id":"https://openalex.org/I26072440","display_name":"Indian Institute of Information Technology Allahabad","ror":"https://ror.org/03rgjt374","country_code":"IN","type":"education","lineage":["https://openalex.org/I26072440"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Amaresh Kumar Pandey","raw_affiliation_strings":["Indian Institute of Information Technology - Allahabad, Allahabad, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Information Technology - Allahabad, Allahabad, India","institution_ids":["https://openalex.org/I26072440"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021143956","display_name":"Tanveer J. Siddiqui","orcid":"https://orcid.org/0000-0002-8190-0646"},"institutions":[{"id":"https://openalex.org/I26072440","display_name":"Indian Institute of Information Technology Allahabad","ror":"https://ror.org/03rgjt374","country_code":"IN","type":"education","lineage":["https://openalex.org/I26072440"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Tanveer J Siddiqui","raw_affiliation_strings":["Indian Institute of Information Technology - Allahabad, Allahabad, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Information Technology - Allahabad, Allahabad, India","institution_ids":["https://openalex.org/I26072440"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5004879235"],"corresponding_institution_ids":["https://openalex.org/I26072440"],"apc_list":null,"apc_paid":null,"fwci":5.7205,"has_fulltext":false,"cited_by_count":53,"citation_normalized_percentile":{"value":0.95856667,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"99","last_page":"105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hindi","display_name":"Hindi","score":0.9359081983566284},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7923486232757568},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.781543493270874},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6366990804672241},{"id":"https://openalex.org/keywords/root","display_name":"Root (linguistics)","score":0.627322256565094},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6258705258369446},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.5878551006317139},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.46818816661834717},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4328671991825104},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39966678619384766},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3688891530036926},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1365204155445099},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.0817432701587677}],"concepts":[{"id":"https://openalex.org/C519982507","wikidata":"https://www.wikidata.org/wiki/Q1568","display_name":"Hindi","level":2,"score":0.9359081983566284},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7923486232757568},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.781543493270874},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6366990804672241},{"id":"https://openalex.org/C171078966","wikidata":"https://www.wikidata.org/wiki/Q111029","display_name":"Root (linguistics)","level":2,"score":0.627322256565094},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6258705258369446},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.5878551006317139},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.46818816661834717},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4328671991825104},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39966678619384766},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3688891530036926},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1365204155445099},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0817432701587677},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1390749.1390765","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1390749.1390765","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the second workshop on Analytics for noisy unstructured text data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6299999952316284}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W26591655","https://openalex.org/W86405024","https://openalex.org/W1983311927","https://openalex.org/W1986909372","https://openalex.org/W1997841190","https://openalex.org/W2007976151","https://openalex.org/W2026551167","https://openalex.org/W2039660096","https://openalex.org/W2066980717","https://openalex.org/W2070554026","https://openalex.org/W2098162425","https://openalex.org/W2101711363","https://openalex.org/W2114986384","https://openalex.org/W2138958299","https://openalex.org/W2150144720","https://openalex.org/W2168965629","https://openalex.org/W3183153947","https://openalex.org/W4251556668"],"related_works":["https://openalex.org/W2384553807","https://openalex.org/W1557888283","https://openalex.org/W3089999372","https://openalex.org/W2265245145","https://openalex.org/W3110423299","https://openalex.org/W4312229285","https://openalex.org/W2738278463","https://openalex.org/W4308939443","https://openalex.org/W2103574067","https://openalex.org/W2393370774"],"abstract_inverted_index":{"Stemmers":[0],"are":[1],"used":[2],"to":[3,17,39,99,125],"convert":[4],"inflected":[5],"words":[6,70,73,89],"into":[7,30],"their":[8],"root":[9,19],"or":[10],"stem.":[11],"Stem":[12],"does":[13,114],"not":[14,115],"necessarily":[15],"correspond":[16],"linguistic":[18],"of":[20,50,58,136,140,143],"a":[21],"word.":[22],"Stemming":[23],"improve":[24],"performance":[25],"by":[26,86],"reducing":[27,141],"morphologically":[28],"variants":[29],"same":[31],"words.":[32,63],"This":[33,44],"paper":[34,45],"presents":[35],"an":[36,51],"approach":[37,59,67],"is":[38],"develop":[40],"unsupervised":[41,52],"Hindi":[42,55,76,146],"stemmer.":[43,173],"focus":[46],"on":[47,68],"the":[48,112,134,172],"development":[49],"stemmer":[53,137,158,162,169],"for":[54,145],"and":[56,160],"evaluation":[57],"using":[60],"manually":[61],"segmented":[62],"We":[64,131],"evaluate":[65,133],"our":[66,168],"1000-1000":[69],"randomly":[71],"extracted":[72,90],"(only)":[74],"from":[75,91],"WordNet1":[77],"data":[78,82],"base.":[79],"The":[80,94,107,150],"training":[81],"has":[83],"been":[84,153],"constructed":[85],"extracting":[87],"106403":[88],"EMILLE2":[92],"corpus.":[93],"observed":[95],"accuracy":[96],"was":[97,109],"found":[98],"be":[100,123],"89.9%":[101],"after":[102],"applying":[103],"some":[104],"heuristic":[105],"measures.":[106],"F-score":[108],"94.96%.":[110],"As":[111],"algorithm":[113],"require":[116],"any":[117],"language":[118],"specific":[119],"information,":[120],"it":[121],"can":[122],"applied":[124],"other":[126],"Indian":[127],"languages":[128],"as":[129],"well.":[130],"also":[132],"effect":[135],"in":[138],"terms":[139],"size":[142],"index":[144],"information":[147],"retrieval":[148],"task.":[149],"results":[151],"have":[152],"compared":[154],"with":[155],"light":[156],"weight":[157],"[10]":[159],"UMass":[161],"[17].":[163],"Test":[164],"run":[165],"shows":[166],"that":[167],"outperforms":[170],"both":[171]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":3},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":5},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":8},{"year":2012,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
