{"id":"https://openalex.org/W2060413777","doi":"https://doi.org/10.1145/1838745.1838748","title":"Comparative Study of Indexing and Search Strategies for the Hindi, Marathi, and Bengali Languages","display_name":"Comparative Study of Indexing and Search Strategies for the Hindi, Marathi, and Bengali Languages","publication_year":2010,"publication_date":"2010-09-01","ids":{"openalex":"https://openalex.org/W2060413777","doi":"https://doi.org/10.1145/1838745.1838748","mag":"2060413777"},"language":"en","primary_location":{"id":"doi:10.1145/1838745.1838748","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1838745.1838748","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063528547","display_name":"Ljiljana Dolamic","orcid":"https://orcid.org/0000-0002-0656-5315"},"institutions":[{"id":"https://openalex.org/I57825437","display_name":"University of Neuch\u00e2tel","ror":"https://ror.org/00vasag41","country_code":"CH","type":"education","lineage":["https://openalex.org/I57825437"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Ljiljana Dolamic","raw_affiliation_strings":["University of Neuchatel","University of Neuchatel;"],"affiliations":[{"raw_affiliation_string":"University of Neuchatel","institution_ids":["https://openalex.org/I57825437"]},{"raw_affiliation_string":"University of Neuchatel;","institution_ids":["https://openalex.org/I57825437"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022095958","display_name":"Jacques Savoy","orcid":"https://orcid.org/0000-0002-4486-0067"},"institutions":[{"id":"https://openalex.org/I57825437","display_name":"University of Neuch\u00e2tel","ror":"https://ror.org/00vasag41","country_code":"CH","type":"education","lineage":["https://openalex.org/I57825437"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Jacques Savoy","raw_affiliation_strings":["University of Neuchatel","University of Neuchatel;"],"affiliations":[{"raw_affiliation_string":"University of Neuchatel","institution_ids":["https://openalex.org/I57825437"]},{"raw_affiliation_string":"University of Neuchatel;","institution_ids":["https://openalex.org/I57825437"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5063528547"],"corresponding_institution_ids":["https://openalex.org/I57825437"],"apc_list":null,"apc_paid":null,"fwci":4.6549,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.94670974,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"9","issue":"3","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/marathi","display_name":"Marathi","score":0.8108161687850952},{"id":"https://openalex.org/keywords/bengali","display_name":"Bengali","score":0.7730270028114319},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7641620635986328},{"id":"https://openalex.org/keywords/hindi","display_name":"Hindi","score":0.7632861733436584},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.7122982144355774},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6722106337547302},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6282516717910767},{"id":"https://openalex.org/keywords/bilingual-dictionary","display_name":"Bilingual dictionary","score":0.5624529719352722},{"id":"https://openalex.org/keywords/devanagari","display_name":"Devanagari","score":0.4864034950733185},{"id":"https://openalex.org/keywords/vector-space-model","display_name":"Vector space model","score":0.47079625725746155},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.4683973789215088},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.4325029253959656},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4208313226699829},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.19375821948051453}],"concepts":[{"id":"https://openalex.org/C2776844415","wikidata":"https://www.wikidata.org/wiki/Q1571","display_name":"Marathi","level":2,"score":0.8108161687850952},{"id":"https://openalex.org/C19235068","wikidata":"https://www.wikidata.org/wiki/Q9610","display_name":"Bengali","level":2,"score":0.7730270028114319},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7641620635986328},{"id":"https://openalex.org/C519982507","wikidata":"https://www.wikidata.org/wiki/Q1568","display_name":"Hindi","level":2,"score":0.7632861733436584},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.7122982144355774},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6722106337547302},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6282516717910767},{"id":"https://openalex.org/C2779235283","wikidata":"https://www.wikidata.org/wiki/Q2640207","display_name":"Bilingual dictionary","level":2,"score":0.5624529719352722},{"id":"https://openalex.org/C2780144916","wikidata":"https://www.wikidata.org/wiki/Q38592","display_name":"Devanagari","level":4,"score":0.4864034950733185},{"id":"https://openalex.org/C89686163","wikidata":"https://www.wikidata.org/wiki/Q1187982","display_name":"Vector space model","level":2,"score":0.47079625725746155},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.4683973789215088},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.4325029253959656},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4208313226699829},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.19375821948051453},{"id":"https://openalex.org/C2987247673","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Character recognition","level":3,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1838745.1838748","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1838745.1838748","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.999.8160","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.999.8160","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://members.unine.ch/jacques.savoy/Papers/HindiTALIP.pdf","raw_type":"text"},{"id":"pmh:oai:doc.rero.ch:20130108094125-KS","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400148","display_name":"reroDoc Digital Library","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8399999737739563,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G8964868216","display_name":null,"funder_award_id":"200021-113273","funder_id":"https://openalex.org/F4320320924","funder_display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung"}],"funders":[{"id":"https://openalex.org/F4320320924","display_name":"Schweizerischer Nationalfonds zur F\u00f6rderung der Wissenschaftlichen Forschung","ror":"https://ror.org/00yjd3n13"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W26591655","https://openalex.org/W581857221","https://openalex.org/W1499624045","https://openalex.org/W1501945993","https://openalex.org/W1521082019","https://openalex.org/W1531925767","https://openalex.org/W1532325895","https://openalex.org/W1542487946","https://openalex.org/W1554385128","https://openalex.org/W1557757161","https://openalex.org/W1589845329","https://openalex.org/W1733898586","https://openalex.org/W1972594981","https://openalex.org/W1974123979","https://openalex.org/W1976398589","https://openalex.org/W1987680958","https://openalex.org/W1998347217","https://openalex.org/W2008495066","https://openalex.org/W2029097226","https://openalex.org/W2029115643","https://openalex.org/W2037140704","https://openalex.org/W2038114184","https://openalex.org/W2045137302","https://openalex.org/W2054364203","https://openalex.org/W2058200372","https://openalex.org/W2070620842","https://openalex.org/W2077046902","https://openalex.org/W2080068076","https://openalex.org/W2091944322","https://openalex.org/W2096630656","https://openalex.org/W2098162425","https://openalex.org/W2102203038","https://openalex.org/W2105157020","https://openalex.org/W2118020653","https://openalex.org/W2123591077","https://openalex.org/W2131133093","https://openalex.org/W2138958299","https://openalex.org/W2146798791","https://openalex.org/W2148398225","https://openalex.org/W2164514763","https://openalex.org/W2591816337","https://openalex.org/W2614257592","https://openalex.org/W2742538188","https://openalex.org/W2768581363","https://openalex.org/W4213009331","https://openalex.org/W4285719527","https://openalex.org/W6601052559","https://openalex.org/W6617013663","https://openalex.org/W6649662243","https://openalex.org/W6725862456"],"related_works":["https://openalex.org/W4309803833","https://openalex.org/W1512381921","https://openalex.org/W2885232723","https://openalex.org/W2170266422","https://openalex.org/W4385502514","https://openalex.org/W2060143251","https://openalex.org/W4297798697","https://openalex.org/W3183678392","https://openalex.org/W3043312179","https://openalex.org/W4285362203"],"abstract_inverted_index":{"The":[0],"main":[1],"goal":[2],"of":[3,62,83,90,118,219,275,297,321],"this":[4,45,217],"article":[5,46],"is":[6,246],"to":[7,98,177,209,267,288,295],"describe":[8],"and":[9,13,20,35,41,65,68,72,96,104,114,140,153,227,261,277,316],"evaluate":[10,105,124],"various":[11,130],"indexing":[12,109,229,285,306,328],"search":[14],"strategies":[15,86],"for":[16,204,241,254,259,263,314],"the":[17,29,59,91,111,119,134,164,171,179,248,255,283,304,312,319],"Hindi,":[18],"Bengali,":[19,264],"Marathi":[21],"languages.":[22],"These":[23],"three":[24,160],"languages":[25,34,50,161],"are":[26,235,252],"ranked":[27],"among":[28],"world\u2019s":[30],"20":[31],"most":[32],"spoken":[33],"they":[36],"share":[37],"similar":[38,294],"syntax,":[39],"morphology,":[40],"writing":[42],"systems.":[43],"In":[44,80],"we":[47,87,102,280],"examine":[48],"these":[49,84,125],"from":[51,137,170,173],"an":[52,243,298],"Information":[53],"Retrieval":[54],"(IR)":[55],"perspective":[56],"through":[57],"describing":[58],"key":[60],"elements":[61],"their":[63],"inflectional":[64],"derivational":[66,206],"morphologies,":[67],"suggest":[69,188],"a":[70,212,268,273,323,326],"light":[71,213],"more":[73,198],"aggressive":[74,199,244,299],"stemming":[75,85,226,228,324],"approach":[76],"based":[77],"on":[78,272],"them.":[79],"our":[81,100,129],"evaluation":[82],"make":[88],"use":[89],"FIRE":[92],"2008":[93],"test":[94],"collections,":[95],"then":[97],"broaden":[99],"comparisons":[101],"implement":[103],"two":[106,147],"language":[107,142],"independent":[108],"methods:":[110],"n":[112,116,121],"-gram":[113],"trunc-":[115],"(truncation":[117],"first":[120],"letters).":[122],"We":[123],"solutions":[126],"by":[127,196],"applying":[128,197,322],"IR":[131],"models,":[132],"including":[133],"Okapi,":[135],"Divergence":[136,172],"Randomness":[138,174],"(DFR)":[139],"statistical":[141],"models":[143],"(LM)":[144],"together":[145],"with":[146,158],"classical":[148],"vector-space":[149],"approaches:":[150],"tf":[151],"idf":[152],"Lnu-ltc":[154],".":[155],"Experiments":[156],"performed":[157],"all":[159],"demonstrate":[162],"that":[163,189,232,282],"I(n":[165],"e":[166],")C2":[167],"model":[168],"derived":[169],"paradigm":[175],"tends":[176,287],"provide":[178],"best":[180],"mean":[181],"average":[182],"precision":[183],"(MAP).":[184],"Our":[185],"own":[186],"tests":[187],"improved":[190],"retrieval":[191],"effectiveness":[192],"would":[193],"be":[194],"obtained":[195,251],"stemmers,":[200],"especially":[201],"those":[202,210,296],"accounting":[203],"certain":[205],"suffixes,":[207],"compared":[208,266],"involving":[211],"stemmer":[214,245],"or":[215,325],"ignoring":[216],"type":[218],"word":[220],"normalization":[221],"procedure.":[222],"Comparisons":[223],"between":[224],"no":[225],"schemes":[230],"shows":[231],"performance":[233,291],"differences":[234],"almost":[236],"always":[237],"statistically":[238,293],"significant.":[239],"When,":[240],"example,":[242],"applied,":[247],"relative":[249],"improvements":[250],"~28%":[253],"Hindi":[256],"language,":[257],"~42%":[258],"Marathi,":[260],"~18%":[262],"as":[265],"no-stemming":[269],"approach.":[270],"Based":[271],"comparison":[274],"word-based":[276],"language-independent":[278],"approaches":[279],"find":[281],"trunc-4":[284,327],"scheme":[286],"result":[289],"in":[290],"levels":[292],"stemmer,":[300],"yet":[301],"better":[302],"than":[303],"4-gram":[305],"scheme.":[307,329],"A":[308],"query-by-query":[309],"analysis":[310],"reveals":[311],"reasons":[313],"this,":[315],"also":[317],"demonstrates":[318],"advantage":[320]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
