{"id":"https://openalex.org/W1997783092","doi":"https://doi.org/10.1145/1838745.1838749","title":"Sub-Word Indexing and Blind Relevance Feedback for English, Bengali, Hindi, and Marathi IR","display_name":"Sub-Word Indexing and Blind Relevance Feedback for English, Bengali, Hindi, and Marathi IR","publication_year":2010,"publication_date":"2010-09-01","ids":{"openalex":"https://openalex.org/W1997783092","doi":"https://doi.org/10.1145/1838745.1838749","mag":"1997783092"},"language":"en","primary_location":{"id":"doi:10.1145/1838745.1838749","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1838745.1838749","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://doras.dcu.ie/16033/","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063045515","display_name":"Johannes Leveling","orcid":"https://orcid.org/0000-0003-0603-4191"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Johannes Leveling","raw_affiliation_strings":["Dublin City University","Dublin City University#TAB#"],"affiliations":[{"raw_affiliation_string":"Dublin City University","institution_ids":["https://openalex.org/I42934936"]},{"raw_affiliation_string":"Dublin City University#TAB#","institution_ids":["https://openalex.org/I42934936"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018481328","display_name":"Gareth J. F. Jones","orcid":"https://orcid.org/0000-0003-2923-8365"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Gareth J. F. Jones","raw_affiliation_strings":["Dublin City University","Dublin City University#TAB#"],"affiliations":[{"raw_affiliation_string":"Dublin City University","institution_ids":["https://openalex.org/I42934936"]},{"raw_affiliation_string":"Dublin City University#TAB#","institution_ids":["https://openalex.org/I42934936"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5063045515"],"corresponding_institution_ids":["https://openalex.org/I42934936"],"apc_list":null,"apc_paid":null,"fwci":2.8593,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.92555889,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"9","issue":"3","first_page":"1","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/marathi","display_name":"Marathi","score":0.941531777381897},{"id":"https://openalex.org/keywords/hindi","display_name":"Hindi","score":0.8594465255737305},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.810372531414032},{"id":"https://openalex.org/keywords/bengali","display_name":"Bengali","score":0.7635306119918823},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.6809275150299072},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6623186469078064},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6609686613082886},{"id":"https://openalex.org/keywords/relevance-feedback","display_name":"Relevance feedback","score":0.635700523853302},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.6165181398391724},{"id":"https://openalex.org/keywords/prefix","display_name":"Prefix","score":0.561510443687439},{"id":"https://openalex.org/keywords/vector-space-model","display_name":"Vector space model","score":0.5238382816314697},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.47869589924812317},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.41840726137161255},{"id":"https://openalex.org/keywords/document-retrieval","display_name":"Document retrieval","score":0.4130600094795227},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.1892288625240326},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1572112739086151}],"concepts":[{"id":"https://openalex.org/C2776844415","wikidata":"https://www.wikidata.org/wiki/Q1571","display_name":"Marathi","level":2,"score":0.941531777381897},{"id":"https://openalex.org/C519982507","wikidata":"https://www.wikidata.org/wiki/Q1568","display_name":"Hindi","level":2,"score":0.8594465255737305},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.810372531414032},{"id":"https://openalex.org/C19235068","wikidata":"https://www.wikidata.org/wiki/Q9610","display_name":"Bengali","level":2,"score":0.7635306119918823},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.6809275150299072},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6623186469078064},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6609686613082886},{"id":"https://openalex.org/C2779532271","wikidata":"https://www.wikidata.org/wiki/Q445558","display_name":"Relevance feedback","level":4,"score":0.635700523853302},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.6165181398391724},{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.561510443687439},{"id":"https://openalex.org/C89686163","wikidata":"https://www.wikidata.org/wiki/Q1187982","display_name":"Vector space model","level":2,"score":0.5238382816314697},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.47869589924812317},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.41840726137161255},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.4130600094795227},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.1892288625240326},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1572112739086151},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1838745.1838749","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1838745.1838749","pdf_url":null,"source":{"id":"https://openalex.org/S56575750","display_name":"ACM Transactions on Asian Language Information Processing","issn_l":"1530-0226","issn":["1530-0226","1558-3430"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Asian Language Information Processing","raw_type":"journal-article"},{"id":"pmh:oai:doras.dcu.ie:16033","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401510","display_name":"Dublin City University Open Access Institutional Repository (Dublin City University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I42934936","host_organization_name":"Dublin City University","host_organization_lineage":["https://openalex.org/I42934936"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Conference or Workshop Item"},{"id":"pmh:http://www.rian.ie/48085/","is_oa":true,"landing_page_url":"http://doras.dcu.ie/16033/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400033","display_name":"Arrow@dit (Dublin Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I115570527","host_organization_name":"Dublin Institute of Technology","host_organization_lineage":["https://openalex.org/I115570527"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Leveling, Johannes ORCID: 0000-0003-0603-4191 &lt;https://orcid.org/0000-0003-0603-4191&gt; and Jones, Gareth J.F. ORCID: 0000-0003-2923-8365 &lt;https://orcid.org/0000-0003-2923-8365&gt;  (2010) Sub-word indexing and blind relevance feedback for English, Bengali, Hindi, and Marathi IR.  In: ACM Transactions on Asian Language Information Processing (TALIP) 9.","raw_type":"Other"}],"best_oa_location":{"id":"pmh:http://www.rian.ie/48085/","is_oa":true,"landing_page_url":"http://doras.dcu.ie/16033/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400033","display_name":"Arrow@dit (Dublin Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I115570527","host_organization_name":"Dublin Institute of Technology","host_organization_lineage":["https://openalex.org/I115570527"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Leveling, Johannes ORCID: 0000-0003-0603-4191 &lt;https://orcid.org/0000-0003-0603-4191&gt; and Jones, Gareth J.F. ORCID: 0000-0003-2923-8365 &lt;https://orcid.org/0000-0003-2923-8365&gt;  (2010) Sub-word indexing and blind relevance feedback for English, Bengali, Hindi, and Marathi IR.  In: ACM Transactions on Asian Language Information Processing (TALIP) 9.","raw_type":"Other"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G196017563","display_name":null,"funder_award_id":"Grant 07/CE/I1142","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"}],"funders":[{"id":"https://openalex.org/F4320320847","display_name":"Science Foundation Ireland","ror":"https://ror.org/0271asj38"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W26591655","https://openalex.org/W95284390","https://openalex.org/W101793699","https://openalex.org/W152176684","https://openalex.org/W166721740","https://openalex.org/W311963822","https://openalex.org/W1482214997","https://openalex.org/W1492906232","https://openalex.org/W1509167927","https://openalex.org/W1517312174","https://openalex.org/W1524281572","https://openalex.org/W1525341925","https://openalex.org/W1559915978","https://openalex.org/W1654045153","https://openalex.org/W1819162672","https://openalex.org/W1965282483","https://openalex.org/W1978022086","https://openalex.org/W1979076595","https://openalex.org/W1979459060","https://openalex.org/W1986909372","https://openalex.org/W2000635479","https://openalex.org/W2002157036","https://openalex.org/W2008495066","https://openalex.org/W2020647130","https://openalex.org/W2028546017","https://openalex.org/W2038114184","https://openalex.org/W2043909051","https://openalex.org/W2050661945","https://openalex.org/W2054364203","https://openalex.org/W2058200372","https://openalex.org/W2065096648","https://openalex.org/W2076921108","https://openalex.org/W2084849788","https://openalex.org/W2095277595","https://openalex.org/W2098162425","https://openalex.org/W2100259670","https://openalex.org/W2101711363","https://openalex.org/W2105981469","https://openalex.org/W2118059010","https://openalex.org/W2122661071","https://openalex.org/W2135922393","https://openalex.org/W2138798101","https://openalex.org/W2138958299","https://openalex.org/W2153252192","https://openalex.org/W2164547069","https://openalex.org/W2168965629","https://openalex.org/W2186490579","https://openalex.org/W2188734732","https://openalex.org/W2305803660","https://openalex.org/W2442629089","https://openalex.org/W2759336060","https://openalex.org/W2768581363","https://openalex.org/W4230402390","https://openalex.org/W4231856373","https://openalex.org/W6601052559"],"related_works":["https://openalex.org/W1600697429","https://openalex.org/W2156506451","https://openalex.org/W199666437","https://openalex.org/W2153892331","https://openalex.org/W193554","https://openalex.org/W2364053392","https://openalex.org/W373949808","https://openalex.org/W2369483736","https://openalex.org/W2063134459","https://openalex.org/W1493413336"],"abstract_inverted_index":{"The":[0,109,113,296],"Forum":[1],"for":[2,14,98,175,205,217,228,249,253,279,357],"Information":[3],"Retrieval":[4],"Evaluation":[5],"(FIRE)":[6],"provides":[7],"document":[8,107],"collections,":[9],"topics,":[10],"and":[11,45,55,65,95,105,125,147,177,185,219,227,245,251,256,284],"relevance":[12,61,165,240,302,327],"assessments":[13],"information":[15,346],"retrieval":[16,89,298,318,347],"(IR)":[17],"experiments":[18,83,299],"on":[19,63,91,304,319,331],"Indian":[20],"languages.":[21,207,359],"Several":[22],"research":[23],"questions":[24],"are":[25,50,84],"explored":[26],"in":[27,127,194,236,273,315],"this":[28],"article:":[29],"1)":[30],"How":[31,41,57],"to":[32,42,58,196,338,350],"create":[33,34],"a":[35,120,266,280,309,352],"simple,":[36],"language-independent":[37],"corpus-based":[38,114,137],"stemmer,":[39],"2)":[40],"identify":[43],"sub-words":[44,49,64,305],"which":[46],"types":[47],"of":[48,75,130,153,269,288,312,326,334,355],"suitable":[51],"as":[52,119,141,143,182],"indexing":[53,77,154,210],"units,":[54],"3)":[56],"apply":[59],"blind":[60,164,239],"feedback":[62,67,241,303,328],"how":[66],"term":[68,122],"selection":[69],"is":[70,117,200,265],"affected":[71],"by":[72],"the":[73,76,87,92,99,128,136,144,151,173,212,224,232,260,286,324,332],"type":[74],"unit.":[78],"More":[79],"than":[80,150,172],"140":[81],"IR":[82,258],"conducted":[85],"using":[86,211,242,351],"BM25":[88],"model":[90],"topic":[93],"titles":[94],"descriptions":[96],"(TD)":[97],"FIRE":[100],"2008":[101],"English,":[102,135,209],"Bengali,":[103,254],"Hindi,":[104,229,255],"Marathi":[106,178,257],"collections.":[108],"major":[110],"findings":[111],"are:":[112],"stemming":[115],"approach":[116],"effective":[118],"knowledge-light":[121],"conflation":[123],"step":[124],"useful":[126],"case":[129,268],"few":[131],"language-specific":[132],"resources.":[133],"For":[134,208],"stemmer":[138,146,214],"performs":[139,169,215],"nearly":[140],"well":[142],"Porter":[145,213],"significantly":[148,170],"better":[149,171,192],"baseline":[152,174],"words":[155],"when":[156],"combined":[157],"with":[158,163,238,317],"query":[159],"expansion.":[160],"In":[161],"combination":[162,237],"feedback,":[166],"it":[167],"also":[168],"Bengali":[176,218],"IR.":[179],"Sub-words":[180],"such":[181],"consonant-vowel":[183],"sequences":[184],"word":[186,197,282,320,335],"prefixes":[187],"can":[188],"yield":[189,231,259],"similar":[190],"or":[191,275],"performance":[193],"comparison":[195,316],"indexing.":[198],"There":[199],"no":[201],"best":[202,225],"performing":[203],"method":[204],"all":[206],"best,":[216],"Marathi,":[220],"overlapping":[221],"3-grams":[222],"obtain":[223],"result,":[226],"4-prefixes":[230,252],"highest":[233,261],"MAP.":[234,262],"However,":[235],"10":[243],"documents":[244],"20":[246],"terms,":[247],"6-prefixes":[248],"English":[250],"Sub-word":[263],"identification":[264],"general":[267],"decompounding.":[270],"It":[271],"results":[272],"one":[274],"more":[276],"index":[277,289,313],"terms":[278,290,314,329,356],"single":[281],"form":[283],"increases":[285,345],"number":[287,311,325,354],"but":[291],"decreases":[292],"their":[293],"average":[294],"length.":[295],"corresponding":[297],"show":[300],"that":[301],"benefits":[306],"from":[307],"selecting":[308,323],"larger":[310],"forms.":[321],"Similarly,":[322],"depending":[330],"ratio":[333],"vocabulary":[336,340],"size":[337,341],"sub-word":[339],"almost":[342],"always":[343],"slightly":[344],"effectiveness":[348],"compared":[349],"fixed":[353],"different":[358]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
