{"id":"https://openalex.org/W4413499066","doi":"https://doi.org/10.1093/comjnl/bxaf102","title":"Engineering rank/select data structures for large-alphabet strings","display_name":"Engineering rank/select data structures for large-alphabet strings","publication_year":2025,"publication_date":"2025-08-04","ids":{"openalex":"https://openalex.org/W4413499066","doi":"https://doi.org/10.1093/comjnl/bxaf102"},"language":"en","primary_location":{"id":"doi:10.1093/comjnl/bxaf102","is_oa":false,"landing_page_url":"https://doi.org/10.1093/comjnl/bxaf102","pdf_url":null,"source":{"id":"https://openalex.org/S44643521","display_name":"The Computer Journal","issn_l":"0010-4620","issn":["0010-4620","1460-2067"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Computer Journal","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050699073","display_name":"Diego Arroyuelo","orcid":"https://orcid.org/0000-0002-2509-8097"},"institutions":[{"id":"https://openalex.org/I4210163394","display_name":"Millennium Institute on Immunology and Immunotherapy","ror":"https://ror.org/05j6ybs54","country_code":"CL","type":"facility","lineage":["https://openalex.org/I4210163394"]}],"countries":["CL"],"is_corresponding":true,"raw_author_name":"Diego Arroyuelo","raw_affiliation_strings":["Department of Computer Science, Escuela de Ingenier\u00eda, Pontificia Universidad Cat\u00f3lica de Chile & Millennium Institute for Foundational Research on Data (IMFD) , Vicu\u00f1a Mackenna 4860, Santiago, 7820436 ,"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Escuela de Ingenier\u00eda, Pontificia Universidad Cat\u00f3lica de Chile & Millennium Institute for Foundational Research on Data (IMFD) , Vicu\u00f1a Mackenna 4860, Santiago, 7820436 ,","institution_ids":["https://openalex.org/I4210163394"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119412058","display_name":"Gabriel Carmona","orcid":"https://orcid.org/0009-0004-1454-2940"},"institutions":[{"id":"https://openalex.org/I108290504","display_name":"University of Pisa","ror":"https://ror.org/03ad39j10","country_code":"IT","type":"education","lineage":["https://openalex.org/I108290504"]},{"id":"https://openalex.org/I4210160382","display_name":"Istituto Nazionale di Fisica Nucleare, Sezione di Pisa","ror":"https://ror.org/05symbg58","country_code":"IT","type":"facility","lineage":["https://openalex.org/I160013858","https://openalex.org/I4210160382"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Gabriel Carmona","raw_affiliation_strings":["Department of Computer Science, University of Pisa , Largo Bruno Pontecorvo, 3, Pisa, 56127 ,"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Pisa , Largo Bruno Pontecorvo, 3, Pisa, 56127 ,","institution_ids":["https://openalex.org/I4210160382","https://openalex.org/I108290504"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092029810","display_name":"H\u00e9ctor Larra\u00f1aga","orcid":null},"institutions":[{"id":"https://openalex.org/I75778554","display_name":"Federico Santa Mar\u00eda Technical University","ror":"https://ror.org/05510vn56","country_code":"CL","type":"education","lineage":["https://openalex.org/I75778554"]}],"countries":["CL"],"is_corresponding":false,"raw_author_name":"H\u00e9ctor Larra\u00f1aga","raw_affiliation_strings":["Department of Informatics, Universidad T\u00e9cnica Federico Santa Mar\u00eda , Vicu\u00f1a Mackenna 3939, Santiago, 8940897 ,"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Universidad T\u00e9cnica Federico Santa Mar\u00eda , Vicu\u00f1a Mackenna 3939, Santiago, 8940897 ,","institution_ids":["https://openalex.org/I75778554"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072037870","display_name":"Francisco Riveros","orcid":null},"institutions":[{"id":"https://openalex.org/I75778554","display_name":"Federico Santa Mar\u00eda Technical University","ror":"https://ror.org/05510vn56","country_code":"CL","type":"education","lineage":["https://openalex.org/I75778554"]}],"countries":["CL"],"is_corresponding":false,"raw_author_name":"Francisco Riveros","raw_affiliation_strings":["Department of Informatics, Universidad T\u00e9cnica Federico Santa Mar\u00eda , Vicu\u00f1a Mackenna 3939, Santiago, 8940897 ,"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Universidad T\u00e9cnica Federico Santa Mar\u00eda , Vicu\u00f1a Mackenna 3939, Santiago, 8940897 ,","institution_ids":["https://openalex.org/I75778554"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119412059","display_name":"Carlos Eugenio Rojas-Morales","orcid":"https://orcid.org/0009-0008-3491-5053"},"institutions":[{"id":"https://openalex.org/I75778554","display_name":"Federico Santa Mar\u00eda Technical University","ror":"https://ror.org/05510vn56","country_code":"CL","type":"education","lineage":["https://openalex.org/I75778554"]}],"countries":["CL"],"is_corresponding":false,"raw_author_name":"Carlos Eugenio Rojas-Morales","raw_affiliation_strings":["Department of Informatics, Universidad T\u00e9cnica Federico Santa Mar\u00eda , Vicu\u00f1a Mackenna 3939, Santiago, 8940897 ,"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Universidad T\u00e9cnica Federico Santa Mar\u00eda , Vicu\u00f1a Mackenna 3939, Santiago, 8940897 ,","institution_ids":["https://openalex.org/I75778554"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015457174","display_name":"Erick Sep\u00falveda","orcid":"https://orcid.org/0009-0004-9918-9540"},"institutions":[{"id":"https://openalex.org/I75778554","display_name":"Federico Santa Mar\u00eda Technical University","ror":"https://ror.org/05510vn56","country_code":"CL","type":"education","lineage":["https://openalex.org/I75778554"]}],"countries":["CL"],"is_corresponding":false,"raw_author_name":"Erick Sep\u00falveda","raw_affiliation_strings":["Department of Informatics, Universidad T\u00e9cnica Federico Santa Mar\u00eda , Vicu\u00f1a Mackenna 3939, Santiago, 8940897 ,"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Universidad T\u00e9cnica Federico Santa Mar\u00eda , Vicu\u00f1a Mackenna 3939, Santiago, 8940897 ,","institution_ids":["https://openalex.org/I75778554"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5050699073"],"corresponding_institution_ids":["https://openalex.org/I4210163394"],"apc_list":{"value":2635,"currency":"GBP","value_usd":3232},"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.114815,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"69","issue":"1","first_page":"108","last_page":"132"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12029","display_name":"DNA and Biological Computing","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11567","display_name":"semigroups and automata theory","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/alphabet","display_name":"Alphabet","score":0.748771071434021},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.6951406002044678},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6839711666107178},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33766281604766846},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3291305601596832},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2136399745941162},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.1722082495689392},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.03647473454475403}],"concepts":[{"id":"https://openalex.org/C112876837","wikidata":"https://www.wikidata.org/wiki/Q837518","display_name":"Alphabet","level":2,"score":0.748771071434021},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.6951406002044678},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6839711666107178},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33766281604766846},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3291305601596832},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2136399745941162},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.1722082495689392},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.03647473454475403},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1093/comjnl/bxaf102","is_oa":false,"landing_page_url":"https://doi.org/10.1093/comjnl/bxaf102","pdf_url":null,"source":{"id":"https://openalex.org/S44643521","display_name":"The Computer Journal","issn_l":"0010-4620","issn":["0010-4620","1460-2067"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Computer Journal","raw_type":"journal-article"},{"id":"pmh:oai:arpi.unipi.it:11568/1342707","is_oa":false,"landing_page_url":"https://hdl.handle.net/11568/1342707","pdf_url":null,"source":{"id":"https://openalex.org/S4377196265","display_name":"CINECA IRIS Institutial research information system (University of Pisa)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I108290504","host_organization_name":"University of Pisa","host_organization_lineage":["https://openalex.org/I108290504"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W1485941238","https://openalex.org/W1493802692","https://openalex.org/W1798412263","https://openalex.org/W1844008849","https://openalex.org/W1929352279","https://openalex.org/W1949346071","https://openalex.org/W1965853364","https://openalex.org/W1967156765","https://openalex.org/W1973228454","https://openalex.org/W1974033543","https://openalex.org/W2038647778","https://openalex.org/W2077091042","https://openalex.org/W2086536051","https://openalex.org/W2086727654","https://openalex.org/W2097589086","https://openalex.org/W2100474856","https://openalex.org/W2107082304","https://openalex.org/W2115247329","https://openalex.org/W2121252285","https://openalex.org/W2158874082","https://openalex.org/W2159647614","https://openalex.org/W2161488606","https://openalex.org/W2164107415","https://openalex.org/W2323530184","https://openalex.org/W2406547600","https://openalex.org/W2533248932","https://openalex.org/W2604113568","https://openalex.org/W2805651586","https://openalex.org/W2889833303","https://openalex.org/W2952380754","https://openalex.org/W2963904870","https://openalex.org/W2977591134","https://openalex.org/W2984253873","https://openalex.org/W2999295646","https://openalex.org/W3093730611","https://openalex.org/W3197753456","https://openalex.org/W4221058058","https://openalex.org/W4233459951","https://openalex.org/W4237754109","https://openalex.org/W4244061993","https://openalex.org/W4247794781","https://openalex.org/W4249184094","https://openalex.org/W4312423556","https://openalex.org/W4386509124","https://openalex.org/W4389615663"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Abstract":[0],"Large-alphabet":[1],"strings,":[2,74],"prevalent":[3],"in":[4,65],"information":[5],"retrieval":[6],"and":[7,14,38,75,80],"natural":[8],"language":[9],"processing,":[10],"pose":[11],"unique":[12],"storage":[13],"processing":[15],"challenges.":[16],"This":[17],"paper":[18],"explores":[19],"the":[20,24,35,47,60,88,98],"efficient":[21],"implementation":[22,41],"of":[23,62,78,100],"alphabet-partition":[25],"approach,":[26],"introducing":[27],"a":[28],"compressed":[29,73,85],"data":[30,92],"structure":[31,64,93],"that":[32],"efficiently":[33],"supports":[34],"operations":[36],"${\\mathsf{rank}}$":[37,79],"${\\mathsf{select}}$.":[39,81],"Our":[40],"significantly":[42],"outperforms":[43],"existing":[44],"methods,":[45],"improving":[46],"${\\mathsf{select}}$":[48],"operation":[49],"speed":[50],"by":[51],"80%":[52],"with":[53],"only":[54,95],"11%":[55],"additional":[56],"space.":[57],"We":[58],"demonstrate":[59],"utility":[61],"our":[63,91],"various":[66],"applications,":[67],"including":[68],"inverted":[69],"list":[70],"intersections,":[71],"run-length":[72,84],"distributed":[76],"computation":[77],"Notably,":[82],"for":[83],"strings":[86],"using":[87],"Burrows\u2013Wheeler":[89],"transform,":[90],"requires":[94],"0.98\u20131.09":[96],"times":[97,106],"space":[99],"state-of-the-art":[101],"RLFM-indexes":[102],"to":[103],"achieve":[104],"1.23\u20132.33":[105],"faster":[107],"pattern":[108],"occurrence":[109],"counting":[110],"while":[111],"also":[112],"providing":[113],"better":[114],"theoretical":[115],"guarantees.":[116]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
