{"id":"https://openalex.org/W2011447283","doi":"https://doi.org/10.1145/2536736.2536738","title":"Effective and Robust Query-Based Stemming","display_name":"Effective and Robust Query-Based Stemming","publication_year":2013,"publication_date":"2013-11-01","ids":{"openalex":"https://openalex.org/W2011447283","doi":"https://doi.org/10.1145/2536736.2536738","mag":"2011447283"},"language":"en","primary_location":{"id":"doi:10.1145/2536736.2536738","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2536736.2536738","pdf_url":null,"source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062553068","display_name":"Jiaul H. Paik","orcid":"https://orcid.org/0000-0003-1550-3586"},"institutions":[{"id":"https://openalex.org/I6498739","display_name":"Indian Statistical Institute","ror":"https://ror.org/00q2w1j53","country_code":"IN","type":"education","lineage":["https://openalex.org/I6498739"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Jiaul H. Paik","raw_affiliation_strings":["Indian Statistical Institute, Kolkata","Indian Statistical institute, Kolkata#TAB#"],"affiliations":[{"raw_affiliation_string":"Indian Statistical Institute, Kolkata","institution_ids":["https://openalex.org/I6498739"]},{"raw_affiliation_string":"Indian Statistical institute, Kolkata#TAB#","institution_ids":["https://openalex.org/I6498739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102911608","display_name":"Swapan K. Parui","orcid":"https://orcid.org/0000-0001-6863-5780"},"institutions":[{"id":"https://openalex.org/I6498739","display_name":"Indian Statistical Institute","ror":"https://ror.org/00q2w1j53","country_code":"IN","type":"education","lineage":["https://openalex.org/I6498739"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Swapan K. Parui","raw_affiliation_strings":["Indian Statistical Institute, Kolkata","Indian Statistical institute, Kolkata#TAB#"],"affiliations":[{"raw_affiliation_string":"Indian Statistical Institute, Kolkata","institution_ids":["https://openalex.org/I6498739"]},{"raw_affiliation_string":"Indian Statistical institute, Kolkata#TAB#","institution_ids":["https://openalex.org/I6498739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074943851","display_name":"Dipasree Pal","orcid":null},"institutions":[{"id":"https://openalex.org/I6498739","display_name":"Indian Statistical Institute","ror":"https://ror.org/00q2w1j53","country_code":"IN","type":"education","lineage":["https://openalex.org/I6498739"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Dipasree Pal","raw_affiliation_strings":["Indian Statistical Institute, Kolkata","Indian Statistical institute, Kolkata#TAB#"],"affiliations":[{"raw_affiliation_string":"Indian Statistical Institute, Kolkata","institution_ids":["https://openalex.org/I6498739"]},{"raw_affiliation_string":"Indian Statistical institute, Kolkata#TAB#","institution_ids":["https://openalex.org/I6498739"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081925956","display_name":"Stephen Robertson","orcid":"https://orcid.org/0000-0003-4115-6215"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]},{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Stephen E. Robertson","raw_affiliation_strings":["Microsoft Research, Cambridge, UK","Microsoft Research, Cambridge, UK;"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Cambridge, UK","institution_ids":["https://openalex.org/I4210164937"]},{"raw_affiliation_string":"Microsoft Research, Cambridge, UK;","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5062553068"],"corresponding_institution_ids":["https://openalex.org/I6498739"],"apc_list":null,"apc_paid":null,"fwci":2.4046,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.89821549,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"31","issue":"4","first_page":"1","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9048593640327454},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.7013822197914124},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.698968768119812},{"id":"https://openalex.org/keywords/query-expansion","display_name":"Query expansion","score":0.6424105763435364},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5490036010742188},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.48812445998191833},{"id":"https://openalex.org/keywords/web-query-classification","display_name":"Web query classification","score":0.4693050682544708},{"id":"https://openalex.org/keywords/result-set","display_name":"Result set","score":0.4305824637413025},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4027506113052368},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.3903675973415375},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3408154845237732},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3269854485988617},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.26686930656433105}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9048593640327454},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.7013822197914124},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.698968768119812},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.6424105763435364},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5490036010742188},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.48812445998191833},{"id":"https://openalex.org/C118689300","wikidata":"https://www.wikidata.org/wiki/Q7978614","display_name":"Web query classification","level":4,"score":0.4693050682544708},{"id":"https://openalex.org/C4969071","wikidata":"https://www.wikidata.org/wiki/Q7316353","display_name":"Result set","level":3,"score":0.4305824637413025},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4027506113052368},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.3903675973415375},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3408154845237732},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3269854485988617},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.26686930656433105},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2536736.2536738","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2536736.2536738","pdf_url":null,"source":{"id":"https://openalex.org/S4394735545","display_name":"ACM Transactions on Information Systems","issn_l":"1046-8188","issn":["1046-8188","1558-2868"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Information Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7699999809265137,"id":"https://metadata.un.org/sdg/1","display_name":"No poverty"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W26591655","https://openalex.org/W1507711745","https://openalex.org/W1574901103","https://openalex.org/W1593045043","https://openalex.org/W1654045153","https://openalex.org/W1727944201","https://openalex.org/W1968951234","https://openalex.org/W1979076595","https://openalex.org/W1990190154","https://openalex.org/W2002306339","https://openalex.org/W2002639244","https://openalex.org/W2008495066","https://openalex.org/W2013997717","https://openalex.org/W2026227174","https://openalex.org/W2032039936","https://openalex.org/W2036181041","https://openalex.org/W2038114184","https://openalex.org/W2049453310","https://openalex.org/W2054364203","https://openalex.org/W2058200372","https://openalex.org/W2060413777","https://openalex.org/W2068143774","https://openalex.org/W2073193210","https://openalex.org/W2077948748","https://openalex.org/W2098162425","https://openalex.org/W2101711363","https://openalex.org/W2116211107","https://openalex.org/W2123591077","https://openalex.org/W2138958299","https://openalex.org/W2144115476","https://openalex.org/W6605428294"],"related_works":["https://openalex.org/W2096359267","https://openalex.org/W2026738364","https://openalex.org/W2901901036","https://openalex.org/W2572349046","https://openalex.org/W2013069866","https://openalex.org/W8514837","https://openalex.org/W1793997780","https://openalex.org/W2146885082","https://openalex.org/W3125756434","https://openalex.org/W3049728138"],"abstract_inverted_index":{"Stemming":[0],"is":[1,29,65,80],"a":[2],"widely":[3],"used":[4,27],"technique":[5],"in":[6],"information":[7],"retrieval":[8,62],"systems":[9],"to":[10,51,81],"address":[11,89],"the":[12,25,33,37,46,72,112,124,137,140,149],"vocabulary":[13],"mismatch":[14],"problem":[15],"arising":[16],"out":[17],"of":[18,24,36,77,94,123,128,139],"morphological":[19,34],"phenomena.":[20],"The":[21,74],"major":[22],"shortcoming":[23],"commonly":[26],"stemmers":[28,127],"that":[30,64,111,136],"they":[31],"accept":[32],"variants":[35],"query":[38],"words":[39],"without":[40],"considering":[41],"their":[42],"thematic":[43],"coherence":[44],"with":[45],"given":[47],"query,":[48],"which":[49,88],"leads":[50],"poor":[52],"performance.":[53],"Moreover,":[54],"for":[55],"many":[56],"queries,":[57],"such":[58],"approaches":[59],"also":[60,134],"produce":[61],"performance":[63],"poorer":[66],"than":[67,148],"no":[68],"stemming,":[69],"thereby":[70],"degrading":[71],"robustness.":[73],"main":[75],"goal":[76],"this":[78],"article":[79],"present":[82],"corpus-based":[83],"fully":[84],"automatic":[85],"stemming":[86,115,143],"algorithms":[87,116,144],"these":[90],"issues.":[91],"A":[92],"set":[93],"experiments":[95,133],"on":[96],"six":[97],"TREC":[98],"collections":[99,104],"and":[100,107,118],"three":[101],"other":[102],"non-English":[103],"containing":[105],"news":[106],"web":[108],"documents":[109],"shows":[110],"proposed":[113,141],"query-based":[114,142],"consistently":[117],"significantly":[119],"outperform":[120],"four":[121],"state":[122],"art":[125],"strong":[126,151],"completely":[129],"varying":[130],"principles.":[131],"Our":[132],"confirm":[135],"robustness":[138],"are":[145],"remarkably":[146],"better":[147],"existing":[150],"baselines.":[152]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":3},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
