{"id":"https://openalex.org/W2625992059","doi":"https://doi.org/10.1109/aiccsa.2016.7945743","title":"Efficient inverted index with n-gram sampling for string matching in Arabic documents","display_name":"Efficient inverted index with n-gram sampling for string matching in Arabic documents","publication_year":2016,"publication_date":"2016-11-01","ids":{"openalex":"https://openalex.org/W2625992059","doi":"https://doi.org/10.1109/aiccsa.2016.7945743","mag":"2625992059"},"language":"en","primary_location":{"id":"doi:10.1109/aiccsa.2016.7945743","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aiccsa.2016.7945743","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE/ACS 13th International Conference of Computer Systems and Applications (AICCSA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041553790","display_name":"El Moatez Billah Nagoudi","orcid":null},"institutions":[{"id":"https://openalex.org/I50219554","display_name":"University of Laghouat","ror":"https://ror.org/018bbh535","country_code":"DZ","type":"education","lineage":["https://openalex.org/I50219554"]}],"countries":["DZ"],"is_corresponding":true,"raw_author_name":"El Moatez Billah Nagoudi","raw_affiliation_strings":["Universite Amar Telidji Laghouat, Laghouat, DZ"],"affiliations":[{"raw_affiliation_string":"Universite Amar Telidji Laghouat, Laghouat, DZ","institution_ids":["https://openalex.org/I50219554"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018412827","display_name":"Ahmed Khorsi","orcid":null},"institutions":[{"id":"https://openalex.org/I240666556","display_name":"Imam Mohammad ibn Saud Islamic University","ror":"https://ror.org/05gxjyb39","country_code":"SA","type":"education","lineage":["https://openalex.org/I240666556"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Ahmed Khorsi","raw_affiliation_strings":["Al-Imam Mohammad Ibn Saud, Islamic University, Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"Al-Imam Mohammad Ibn Saud, Islamic University, Saudi Arabia","institution_ids":["https://openalex.org/I240666556"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046969653","display_name":"Hadda Cherroun","orcid":"https://orcid.org/0000-0002-5117-0320"},"institutions":[{"id":"https://openalex.org/I50219554","display_name":"University of Laghouat","ror":"https://ror.org/018bbh535","country_code":"DZ","type":"education","lineage":["https://openalex.org/I50219554"]}],"countries":["DZ"],"is_corresponding":false,"raw_author_name":"Hadda Cherroun","raw_affiliation_strings":["Laboratoire d'Informatique et de, Universit\u00e9 Amar Telidji de Laghouat, Alg\u00e9rie"],"affiliations":[{"raw_affiliation_string":"Laboratoire d'Informatique et de, Universit\u00e9 Amar Telidji de Laghouat, Alg\u00e9rie","institution_ids":["https://openalex.org/I50219554"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5041553790"],"corresponding_institution_ids":["https://openalex.org/I50219554"],"apc_list":null,"apc_paid":null,"fwci":0.4285,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.81395212,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9883999824523926,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inverted-index","display_name":"Inverted index","score":0.7840002775192261},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.767292320728302},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.6464880704879761},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5376324653625488},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5295743942260742},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.5215489864349365},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.4947647452354431},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.4929642677307129},{"id":"https://openalex.org/keywords/substring","display_name":"Substring","score":0.49216336011886597},{"id":"https://openalex.org/keywords/gram","display_name":"Gram","score":0.49201151728630066},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4648750126361847},{"id":"https://openalex.org/keywords/full-text-search","display_name":"Full text search","score":0.41133877635002136},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3768896162509918},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3477073311805725},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.28767988085746765},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2543434500694275},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.19761478900909424},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.19050154089927673},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17822784185409546},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.1727612018585205},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.1583569347858429}],"concepts":[{"id":"https://openalex.org/C130590232","wikidata":"https://www.wikidata.org/wiki/Q1671754","display_name":"Inverted index","level":3,"score":0.7840002775192261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.767292320728302},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.6464880704879761},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5376324653625488},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5295743942260742},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.5215489864349365},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.4947647452354431},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.4929642677307129},{"id":"https://openalex.org/C182407805","wikidata":"https://www.wikidata.org/wiki/Q2626534","display_name":"Substring","level":3,"score":0.49216336011886597},{"id":"https://openalex.org/C161369605","wikidata":"https://www.wikidata.org/wiki/Q41803","display_name":"Gram","level":3,"score":0.49201151728630066},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4648750126361847},{"id":"https://openalex.org/C20228898","wikidata":"https://www.wikidata.org/wiki/Q83540","display_name":"Full text search","level":3,"score":0.41133877635002136},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3768896162509918},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3477073311805725},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.28767988085746765},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2543434500694275},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.19761478900909424},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.19050154089927673},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17822784185409546},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.1727612018585205},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1583569347858429},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C523546767","wikidata":"https://www.wikidata.org/wiki/Q10876","display_name":"Bacteria","level":2,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/aiccsa.2016.7945743","is_oa":false,"landing_page_url":"https://doi.org/10.1109/aiccsa.2016.7945743","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE/ACS 13th International Conference of Computer Systems and Applications (AICCSA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W125979907","https://openalex.org/W142212369","https://openalex.org/W1484435918","https://openalex.org/W1529205966","https://openalex.org/W1532280479","https://openalex.org/W1532325895","https://openalex.org/W1575736368","https://openalex.org/W1986106281","https://openalex.org/W2002562485","https://openalex.org/W2041445031","https://openalex.org/W2068143774","https://openalex.org/W2068589160","https://openalex.org/W2088386938","https://openalex.org/W2107293766","https://openalex.org/W2108706843","https://openalex.org/W2109262879","https://openalex.org/W2121252285","https://openalex.org/W2122282348","https://openalex.org/W2130564474","https://openalex.org/W2153611381","https://openalex.org/W2158874082","https://openalex.org/W2170334586","https://openalex.org/W2596773272","https://openalex.org/W3003506064","https://openalex.org/W4242909782","https://openalex.org/W6605088862"],"related_works":["https://openalex.org/W2906970013","https://openalex.org/W3126081632","https://openalex.org/W2149530645","https://openalex.org/W2088254117","https://openalex.org/W2141389247","https://openalex.org/W4254593385","https://openalex.org/W2399005400","https://openalex.org/W2790582133","https://openalex.org/W2184716188","https://openalex.org/W2133946991"],"abstract_inverted_index":{"Text":[0],"search":[1,35,98],"is":[2,11,37,75,189],"the":[3,30,33,41,47,78,93,97,104,117,119,123,134,182,203,218],"basis":[4],"of":[5,32,81,83,167,184,202],"countless":[6],"applications":[7],"and":[8,15,23,85,96,108,128,133,172],"techniques.":[9],"It":[10],"constrained":[12],"by":[13,40,140,191],"space":[14],"time":[16,132],"resource":[17],"limitations":[18],"inherent":[19],"in":[20,87,144,158],"different":[21,153],"contexts":[22],"scenarios.":[24],"A":[25],"common":[26],"approach":[27,150],"to":[28,38,46,67,76,91,116,121,152,193,209],"minimize":[29],"cost":[31],"general":[34],"task":[36],"start":[39],"characteristics":[42],"which":[43],"are":[44,161],"particular":[45],"targeted":[48],"entity.":[49],"In":[50],"this":[51,159],"paper,":[52],"we":[53,101],"propose":[54],"an":[55,164],"approximative":[56],"index-based":[57],"text":[58],"searching":[59],"algorithm":[60],"that":[61,181],"performances":[62],"can":[63,113,137],"be":[64,138,213],"customized":[65],"respect":[66],"both":[68],"time/memory":[69],"user":[70,118],"constraints.":[71],"The":[72,155],"main":[73],"idea":[74],"exploit":[77],"uneven":[79],"distribution":[80],"frequencies":[82],"letters":[84,107],"n-grams":[86],"natural":[88],"language":[89],"text,":[90],"reduce":[92],"index":[94,126,135,188],"size":[95,127,136,183],"time,":[99],"where":[100],"store":[102],"only":[103],"less":[105],"frequent":[106],"n-grams.":[109],"Moreover,":[110],"our":[111,145,149,185],"technique":[112],"also":[114],"provide":[115],"flexibility":[120],"choose":[122],"tradeoff":[124],"between":[125],"query":[129],"performance.":[130],"Search":[131],"balanced":[139],"varying":[141],"three":[142],"parameters":[143],"approach.":[146],"This":[147],"makes":[148],"flexible":[151],"settings.":[154],"tests":[156],"described":[157],"paper":[160],"driven":[162],"on":[163],"Arabic":[165],"collection":[166],"more":[168,173],"than":[169,174],"450":[170],"documents":[171],"20":[175],"million":[176],"words.":[177],"Experimental":[178],"results":[179],"show":[180],"n-gram":[186,205],"inverted":[187,206],"reduced":[190],"up":[192],"40%\u201385%":[194],"(with":[195],"tolerable":[196],"performance":[197],"penalties)":[198],"compared":[199],"with":[200],"those":[201],"full":[204],"index.":[207],"Generalization":[208],"other":[210],"languages":[211],"should":[212],"straightforward":[214],"as":[215,217],"long":[216],"underlying":[219],"statistical":[220],"property":[221],"applies.":[222]},"counts_by_year":[{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
