{"id":"https://openalex.org/W3200390843","doi":"https://doi.org/10.1109/tkde.2021.3114401","title":"Practical High-Order Entropy-Compressed Text Self-Indexing","display_name":"Practical High-Order Entropy-Compressed Text Self-Indexing","publication_year":2021,"publication_date":"2021-09-22","ids":{"openalex":"https://openalex.org/W3200390843","doi":"https://doi.org/10.1109/tkde.2021.3114401","mag":"3200390843"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2021.3114401","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2021.3114401","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072102175","display_name":"Hongwei Huo","orcid":"https://orcid.org/0000-0002-5436-1851"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongwei Huo","raw_affiliation_strings":["Computer Science and Technology, Xidian University, Xi&#x0027;an, Shaanxi, China"],"raw_orcid":"https://orcid.org/0000-0002-5436-1851","affiliations":[{"raw_affiliation_string":"Computer Science and Technology, Xidian University, Xi&#x0027;an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032990215","display_name":"Long Peng","orcid":"https://orcid.org/0000-0002-8345-6278"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Long","raw_affiliation_strings":["Xidian University, Xi&#x0027;an, Shaanxi, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Xidian University, Xi&#x0027;an, Shaanxi, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003340402","display_name":"Jeffrey Scott Vitter","orcid":"https://orcid.org/0000-0001-7970-6118"},"institutions":[{"id":"https://openalex.org/I368840534","display_name":"University of Mississippi","ror":"https://ror.org/02teq1165","country_code":"US","type":"education","lineage":["https://openalex.org/I368840534","https://openalex.org/I4210141039"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeffrey Scott Vitter","raw_affiliation_strings":["University of Mississippi, Oxford, MS, USA"],"raw_orcid":"https://orcid.org/0000-0001-7970-6118","affiliations":[{"raw_affiliation_string":"University of Mississippi, Oxford, MS, USA","institution_ids":["https://openalex.org/I368840534"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6997,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.76636676,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"35","issue":"3","first_page":"2943","last_page":"2960"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8201067447662354},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.7628373503684998},{"id":"https://openalex.org/keywords/compressed-suffix-array","display_name":"Compressed suffix array","score":0.630513608455658},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5504305958747864},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5238925218582153},{"id":"https://openalex.org/keywords/inverted-index","display_name":"Inverted index","score":0.5027623176574707},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.45040163397789},{"id":"https://openalex.org/keywords/suffix-tree","display_name":"Suffix tree","score":0.41211143136024475},{"id":"https://openalex.org/keywords/pattern-matching","display_name":"Pattern matching","score":0.38040509819984436},{"id":"https://openalex.org/keywords/string-searching-algorithm","display_name":"String searching algorithm","score":0.36413100361824036},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.33157169818878174},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.2747276723384857},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2395561933517456}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8201067447662354},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.7628373503684998},{"id":"https://openalex.org/C100903775","wikidata":"https://www.wikidata.org/wiki/Q5157028","display_name":"Compressed suffix array","level":4,"score":0.630513608455658},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5504305958747864},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5238925218582153},{"id":"https://openalex.org/C130590232","wikidata":"https://www.wikidata.org/wiki/Q1671754","display_name":"Inverted index","level":3,"score":0.5027623176574707},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.45040163397789},{"id":"https://openalex.org/C2781166958","wikidata":"https://www.wikidata.org/wiki/Q1426863","display_name":"Suffix tree","level":3,"score":0.41211143136024475},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.38040509819984436},{"id":"https://openalex.org/C7757238","wikidata":"https://www.wikidata.org/wiki/Q374040","display_name":"String searching algorithm","level":3,"score":0.36413100361824036},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33157169818878174},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2747276723384857},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2395561933517456},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tkde.2021.3114401","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2021.3114401","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1054641142","display_name":"\u5927\u89c4\u6a21\u5e8f\u5217\u6570\u636e\u96c6\u7684\u538b\u7f29\u7d22\u5f15\u4e0e\u641c\u7d22\u7b97\u6cd5\u7814\u7a76","funder_award_id":"61373044","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1100340133","display_name":null,"funder_award_id":"62272358","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W1492127831","https://openalex.org/W1493802692","https://openalex.org/W1504934817","https://openalex.org/W1532491137","https://openalex.org/W1537923221","https://openalex.org/W1560609674","https://openalex.org/W1969046465","https://openalex.org/W1969173824","https://openalex.org/W1985108724","https://openalex.org/W2013697960","https://openalex.org/W2038647778","https://openalex.org/W2040818482","https://openalex.org/W2046038806","https://openalex.org/W2049415039","https://openalex.org/W2056707490","https://openalex.org/W2059513841","https://openalex.org/W2072021854","https://openalex.org/W2077091042","https://openalex.org/W2088386938","https://openalex.org/W2090021115","https://openalex.org/W2100173929","https://openalex.org/W2107082304","https://openalex.org/W2110007746","https://openalex.org/W2110327674","https://openalex.org/W2121252285","https://openalex.org/W2128777897","https://openalex.org/W2134696992","https://openalex.org/W2158322625","https://openalex.org/W2158874082","https://openalex.org/W2159647614","https://openalex.org/W2161488606","https://openalex.org/W2165734724","https://openalex.org/W2170899819","https://openalex.org/W2291149938","https://openalex.org/W2540753323","https://openalex.org/W2564080404","https://openalex.org/W2566822412","https://openalex.org/W2736438468","https://openalex.org/W2811012751","https://openalex.org/W2883331833","https://openalex.org/W2963429594","https://openalex.org/W2964320652","https://openalex.org/W2991364609","https://openalex.org/W3001034374","https://openalex.org/W3200390843","https://openalex.org/W4230945265","https://openalex.org/W4247794781","https://openalex.org/W6654031322","https://openalex.org/W6679642144","https://openalex.org/W6683504902","https://openalex.org/W6692685519"],"related_works":["https://openalex.org/W2124852384","https://openalex.org/W1807007002","https://openalex.org/W3044224375","https://openalex.org/W1848961842","https://openalex.org/W2410393273","https://openalex.org/W2045266738","https://openalex.org/W2355168454","https://openalex.org/W1562888603","https://openalex.org/W4234430975","https://openalex.org/W2070949798"],"abstract_inverted_index":{"Compressed":[0],"self-indexes":[1],"are":[2],"used":[3],"widely":[4],"in":[5,35,71,108,123,135,176],"string":[6],"processing":[7],"applications,":[8,68],"such":[9,69],"as":[10,70],"information":[11],"retrieval,":[12],"genome":[13,72],"analysis,":[14,73],"data":[15,43,165],"mining,":[16],"and":[17,32,41,85,90,137,179],"web":[18],"searching.":[19],"The":[20],"index":[21,40,170],"not":[22,77],"only":[23],"indexes":[24],"the":[25,30,39,42,55,79,124,127,133,157,163,168,183,189],"data,":[26,31],"but":[27],"also":[28],"encodes":[29,45],"it":[33,44],"is":[34],"compressed":[36,83,116],"form.":[37],"Moreover,":[38],"can":[46,147],"be":[47],"operated":[48],"upon":[49],"directly,":[50],"without":[51],"need":[52],"to":[53],"uncompress":[54],"entire":[56],"index,":[57],"thus":[58,86],"saving":[59],"time":[60,150,181],"while":[61],"maintaining":[62],"small":[63],"storage":[64],"space.":[65],"In":[66,94],"some":[67],"existing":[74],"methods":[75],"do":[76],"exploit":[78],"full":[80],"possibilities":[81],"of":[82,115,126,139,156],"self-indexes,":[84],"we":[87,97],"seek":[88],"faster":[89],"more":[91],"space-efficient":[92],"indexes.":[93],"this":[95],"paper,":[96],"propose":[98],"a":[99,109,120],"practical":[100,113],"high-order":[101],"entropy-compressed":[102],"self-index":[103],"for":[104],"efficient":[105],"pattern":[106],"matching":[107],"text.":[110],"We":[111,131,146],"give":[112],"implementations":[114],"suffix":[117],"arrays":[118],"using":[119,152],"hybrid":[121],"encoding":[122],"representation":[125],"neighbor":[128,158],"function":[129],".":[130],"analyze":[132],"performance":[134],"theory":[136],"practice":[138],"our":[140],"recommended":[141],"indexing":[142,185],"method,":[143],"called":[144],"GeCSA.":[145],"improve":[148],"retrieval":[149,180],"further":[151],"an":[153],"iterated":[154],"version":[155],"function.":[159],"Experimental":[160],"results":[161],"on":[162,188],"tested":[164],"demonstrate":[166],"that":[167],"proposed":[169],"GeCSA":[171],"has":[172],"good":[173],"overall":[174],"advantages":[175],"space":[177],"usage":[178],"over":[182],"state-of-the-art":[184],"methods,":[186],"especially":[187],"repetitive":[190],"data.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
