{"id":"https://openalex.org/W2139237335","doi":"https://doi.org/10.1177/0165551514530655","title":"Automatic identification of light stop words for Persian information retrieval systems","display_name":"Automatic identification of light stop words for Persian information retrieval systems","publication_year":2014,"publication_date":"2014-04-11","ids":{"openalex":"https://openalex.org/W2139237335","doi":"https://doi.org/10.1177/0165551514530655","mag":"2139237335"},"language":"en","primary_location":{"id":"doi:10.1177/0165551514530655","is_oa":false,"landing_page_url":"https://doi.org/10.1177/0165551514530655","pdf_url":null,"source":{"id":"https://openalex.org/S68913162","display_name":"Journal of Information Science","issn_l":"0165-5515","issn":["0165-5515","1741-6485"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Information Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100658844","display_name":"Mohammad Amin Sadeghi","orcid":"https://orcid.org/0000-0002-6409-7679"},"institutions":[{"id":"https://openalex.org/I108103353","display_name":"Universidad de Valladolid","ror":"https://ror.org/01fvbaw18","country_code":"ES","type":"education","lineage":["https://openalex.org/I108103353"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Mohammad Sadeghi","raw_affiliation_strings":["Computer Science Department, University of Valladolid, Spain"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, University of Valladolid, Spain","institution_ids":["https://openalex.org/I108103353"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056869468","display_name":"Jes\u00fas Vegas","orcid":"https://orcid.org/0000-0001-6568-4958"},"institutions":[{"id":"https://openalex.org/I108103353","display_name":"Universidad de Valladolid","ror":"https://ror.org/01fvbaw18","country_code":"ES","type":"education","lineage":["https://openalex.org/I108103353"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jes\u00fas Vegas","raw_affiliation_strings":["Computer Science Department, University of Valladolid, Spain"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, University of Valladolid, Spain","institution_ids":["https://openalex.org/I108103353"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100658844"],"corresponding_institution_ids":["https://openalex.org/I108103353"],"apc_list":null,"apc_paid":null,"fwci":3.9446,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.94271683,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"40","issue":"4","first_page":"476","last_page":"487"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stop-words","display_name":"Stop words","score":0.8443467020988464},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7674099206924438},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.702205240726471},{"id":"https://openalex.org/keywords/persian","display_name":"Persian","score":0.6876082420349121},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6560038924217224},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6161462068557739},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.6034033894538879},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5625513195991516},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5457389950752258},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5137404203414917},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5125124454498291},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5028735995292664},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1548779010772705},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.14547184109687805},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.10366231203079224}],"concepts":[{"id":"https://openalex.org/C188338183","wikidata":"https://www.wikidata.org/wiki/Q80735","display_name":"Stop words","level":3,"score":0.8443467020988464},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7674099206924438},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.702205240726471},{"id":"https://openalex.org/C2776527531","wikidata":"https://www.wikidata.org/wiki/Q9168","display_name":"Persian","level":2,"score":0.6876082420349121},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6560038924217224},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6161462068557739},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.6034033894538879},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5625513195991516},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5457389950752258},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5137404203414917},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5125124454498291},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5028735995292664},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1548779010772705},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.14547184109687805},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.10366231203079224},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1177/0165551514530655","is_oa":false,"landing_page_url":"https://doi.org/10.1177/0165551514530655","pdf_url":null,"source":{"id":"https://openalex.org/S68913162","display_name":"Journal of Information Science","issn_l":"0165-5515","issn":["0165-5515","1741-6485"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Information Science","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7200000286102295}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1483126227","https://openalex.org/W1985697096","https://openalex.org/W1995875735","https://openalex.org/W2017580301","https://openalex.org/W2026306693","https://openalex.org/W2038367926","https://openalex.org/W2043909051","https://openalex.org/W2074870484","https://openalex.org/W2110474840","https://openalex.org/W2121167884","https://openalex.org/W2123652358","https://openalex.org/W2130428585","https://openalex.org/W2330587814","https://openalex.org/W2621280964","https://openalex.org/W2912129191","https://openalex.org/W3125493437","https://openalex.org/W3142110574","https://openalex.org/W4249352285"],"related_works":["https://openalex.org/W4232962587","https://openalex.org/W2339787954","https://openalex.org/W2258261728","https://openalex.org/W2330233494","https://openalex.org/W1501405543","https://openalex.org/W1888026538","https://openalex.org/W2890674960","https://openalex.org/W4207067687","https://openalex.org/W2886416464","https://openalex.org/W2359738357"],"abstract_inverted_index":{"Stop":[0,19],"word":[1,104,122],"identification":[2],"is":[3,110],"one":[4],"of":[5,144,160,183,190],"the":[6,36,41,88,116,129,147,158,164,176,181,188],"most":[7],"important":[8],"tasks":[9],"for":[10],"many":[11],"text":[12],"processing":[13],"applications":[14],"such":[15],"as":[16,47,56,58,101],"information":[17,39,63,84],"retrieval.":[18],"words":[20,44,91,152,170,185],"occur":[21],"too":[22],"frequently":[23],"in":[24,26,157],"documents":[25],"a":[27,97,102,112,119,137,141,154,172],"collection":[28],"and":[29,50,83,109,149,153,180],"do":[30],"not":[31,111],"contribute":[32],"significantly":[33],"to":[34,86],"determining":[35],"context":[37],"or":[38],"about":[40,194],"documents.":[42],"These":[43],"are":[45],"worthless":[46],"index":[48,161,177,191],"terms":[49,192],"should":[51],"be":[52,125],"removed":[53],"during":[54],"indexing":[55],"well":[57],"before":[59],"querying":[60],"by":[61,127,193],"an":[62,71],"retrieval":[64],"system.":[65],"In":[66,115],"this":[67],"paper,":[68],"we":[69],"propose":[70],"automatic":[72],"aggregated":[73],"methodology":[74],"based":[75],"on":[76,175],"term":[77],"frequency,":[78],"normalized":[79],"inverse":[80],"document":[81],"frequency":[82],"model":[85],"extract":[87],"light":[89,130,168],"stop":[90,99,103,121,131,151,169,184],"from":[92],"Persian":[93,117,148,167],"text.":[94],"We":[95],"define":[96],"\u2018light":[98],"word\u2019":[100],"that":[105],"has":[106],"few":[107],"letters":[108],"compound":[113],"word.":[114],"language,":[118],"complete":[120],"list":[123],"can":[124,186],"derived":[126],"combining":[128],"words.":[132],"The":[133],"evaluation":[134],"results,":[135],"using":[136],"standard":[138],"corpus,":[139],"show":[140],"good":[142],"percentage":[143],"coincidence":[145],"between":[146],"English":[150],"significant":[155],"improvement":[156],"number":[159,189],"terms.":[162],"Specifically,":[163],"first":[165],"32":[166],"have":[171],"great":[173],"impact":[174],"size":[178],"reduction":[179],"set":[182],"reduce":[187],"27%.":[195]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":4},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
