{"id":"https://openalex.org/W4294240425","doi":"https://doi.org/10.4230/lipics.cpm.2026.5","title":"Efficient Grammar Compression via RLZ-Based RePair","display_name":"Efficient Grammar Compression via RLZ-Based RePair","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W4294240425","doi":"https://doi.org/10.4230/lipics.cpm.2026.5"},"language":"en","primary_location":{"id":"pmh:oai:drops-oai.dagstuhl.de:25931","is_oa":true,"landing_page_url":"https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2026.5","pdf_url":null,"source":{"id":"https://openalex.org/S4377196569","display_name":"DROPS (Schloss Dagstuhl \u2013 Leibniz Center for Informatics)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2799853480","host_organization_name":"Schloss Dagstuhl \u2013 Leibniz Center for Informatics","host_organization_lineage":["https://openalex.org/I2799853480"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"publishedVersion"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2026.5","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Varki, Rahul","orcid":"https://orcid.org/0009-0003-5721-9484"},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Varki, Rahul","raw_affiliation_strings":["Department of Computer and Information Science and Engineering, University of Florida, Gainesville, FL, USA"],"raw_orcid":"https://orcid.org/0009-0003-5721-9484","affiliations":[{"raw_affiliation_string":"Department of Computer and Information Science and Engineering, University of Florida, Gainesville, FL, USA","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Gagie, Travis","orcid":"https://orcid.org/0000-0003-3689-327X"},"institutions":[{"id":"https://openalex.org/I129902397","display_name":"Dalhousie University","ror":"https://ror.org/01e6qks80","country_code":"CA","type":"education","lineage":["https://openalex.org/I129902397"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Gagie, Travis","raw_affiliation_strings":["Faculty of Computer Science, Dalhousie University, Halifax, Canada"],"raw_orcid":"https://orcid.org/0000-0003-3689-327X","affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Dalhousie University, Halifax, Canada","institution_ids":["https://openalex.org/I129902397"]}]},{"author_position":"last","author":{"id":null,"display_name":"Boucher, Christina","orcid":"https://orcid.org/0000-0001-9509-9725"},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Boucher, Christina","raw_affiliation_strings":["Department of Computer and Information Science and Engineering, University of Florida, Gainesville, FL, USA"],"raw_orcid":"https://orcid.org/0000-0001-9509-9725","affiliations":[{"raw_affiliation_string":"Department of Computer and Information Science and Engineering, University of Florida, Gainesville, FL, USA","institution_ids":["https://openalex.org/I33213144"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.00039224,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/grammar","display_name":"Grammar","score":0.7692912220954895},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.6065972447395325},{"id":"https://openalex.org/keywords/compression","display_name":"Compression (physics)","score":0.542137086391449},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5351601243019104},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.5165859460830688},{"id":"https://openalex.org/keywords/constant","display_name":"Constant (computer programming)","score":0.5117158889770508},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.502216100692749},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.48436078429222107},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4431428909301758},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.42738503217697144},{"id":"https://openalex.org/keywords/linear-space","display_name":"Linear space","score":0.41067320108413696},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4004727303981781},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.38908809423446655},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.3623775839805603},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19328609108924866},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11803063750267029},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.07711440324783325},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.07589426636695862}],"concepts":[{"id":"https://openalex.org/C26022165","wikidata":"https://www.wikidata.org/wiki/Q8091","display_name":"Grammar","level":2,"score":0.7692912220954895},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.6065972447395325},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.542137086391449},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5351601243019104},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.5165859460830688},{"id":"https://openalex.org/C2777027219","wikidata":"https://www.wikidata.org/wiki/Q1284190","display_name":"Constant (computer programming)","level":2,"score":0.5117158889770508},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.502216100692749},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.48436078429222107},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4431428909301758},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.42738503217697144},{"id":"https://openalex.org/C176370821","wikidata":"https://www.wikidata.org/wiki/Q1826459","display_name":"Linear space","level":2,"score":0.41067320108413696},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4004727303981781},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38908809423446655},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.3623775839805603},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19328609108924866},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11803063750267029},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.07711440324783325},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.07589426636695862},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:drops-oai.dagstuhl.de:25931","is_oa":true,"landing_page_url":"https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2026.5","pdf_url":null,"source":{"id":"https://openalex.org/S4377196569","display_name":"DROPS (Schloss Dagstuhl \u2013 Leibniz Center for Informatics)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2799853480","host_organization_name":"Schloss Dagstuhl \u2013 Leibniz Center for Informatics","host_organization_lineage":["https://openalex.org/I2799853480"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"publishedVersion"},{"id":"doi:10.4230/lipics.cpm.2026.5","is_oa":true,"landing_page_url":"https://doi.org/10.4230/lipics.cpm.2026.5","pdf_url":null,"source":{"id":"https://openalex.org/S7407052059","display_name":"Dagstuhl Research Online Publication Server","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"pmh:oai:drops-oai.dagstuhl.de:25931","is_oa":true,"landing_page_url":"https://drops.dagstuhl.de/entities/document/10.4230/LIPIcs.CPM.2026.5","pdf_url":null,"source":{"id":"https://openalex.org/S4377196569","display_name":"DROPS (Schloss Dagstuhl \u2013 Leibniz Center for Informatics)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2799853480","host_organization_name":"Schloss Dagstuhl \u2013 Leibniz Center for Informatics","host_organization_lineage":["https://openalex.org/I2799853480"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"publishedVersion"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5199999809265137,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4235381733","https://openalex.org/W2355022049","https://openalex.org/W2060429446","https://openalex.org/W2741782512","https://openalex.org/W3011302839","https://openalex.org/W2392958391","https://openalex.org/W3155227409","https://openalex.org/W2386709048","https://openalex.org/W2898682874","https://openalex.org/W2115360030"],"abstract_inverted_index":{"Among":[0],"grammar-based":[1,200],"compression":[2],"techniques,":[3],"RePair":[4,97,128,144,195],"is":[5,133,185,203],"a":[6,59,69,78,96,174,199],"notable":[7],"offline":[8],"encoding":[9,64],"scheme":[10,65],"known":[11,32],"for":[12,206],"its":[13],"simplicity":[14],"and":[15,61,151,209],"powerful":[16],"combinatorial":[17],"properties,":[18],"producing":[19],"compact":[20],"grammars":[21],"by":[22,167],"repeatedly":[23],"replacing":[24],"the":[25,47,100,104,107,120,123,127,131,139,152,188,212],"most":[26],"frequent":[27],"adjacent":[28],"pairs":[29],"of":[30,73,103,109,122,154,187,215],"symbols,":[31],"as":[33,44,142],"bigrams.":[34],"However,":[35],"RePair\u2019s":[36],"memory":[37,149,165],"usage":[38,150,166],"scales":[39],"poorly":[40],"with":[41],"input":[42],"size,":[43],"it":[45,82],"loads":[46],"entire":[48],"text":[49,70],"into":[50],"memory.":[51],"In":[52,157],"contrast,":[53],"Relative":[54],"Lempel-Ziv":[55],"(RLZ)":[56],"parsing":[57],"offers":[58],"scalable":[60,190],"lightweight":[62],"online":[63],"that":[66,75,94,161,192,202],"losslessly":[67],"represents":[68],"in":[71,198],"terms":[72],"phrases":[74,125],"refer":[76],"to":[77,85,179,211],"reference":[79,132],"string,":[80],"but":[81],"often":[83],"fails":[84],"expose":[86],"deeper":[87],"structural":[88],"patterns.":[89],"We":[90],"introduce":[91],"an":[92],"algorithm":[93],"produces":[95],"grammar":[98,141],"from":[99],"RLZ":[101,124],"parse":[102],"input,":[105],"leveraging":[106],"strengths":[108],"both":[110,148,204],"methods.":[111],"Our":[112],"method,":[113],"RLZ-RePair,":[114],"performs":[115],"bigram":[116,155],"replacements":[117],"systematically,":[118],"preserving":[119],"integrity":[121],"throughout":[126],"iterations.":[129],"When":[130],"well":[134],"chosen,":[135],"our":[136,182],"method":[137],"achieves":[138],"same":[140],"standard":[143],"while":[145,171],"significantly":[146],"reducing":[147],"number":[153],"replacements.":[156],"particular,":[158],"we":[159],"show":[160],"RLZ-RePair":[162,184],"can":[163],"reduce":[164],"more":[168],"than":[169],"80%":[170],"incurring":[172],"only":[173],"modest":[175],"runtime":[176],"increase":[177],"compared":[178],"RePair.":[180,216],"To":[181],"knowledge,":[183],"one":[186],"first":[189],"methods":[191],"constructs":[193],"exact":[194],"grammars,":[196],"resulting":[197],"compressor":[201],"practical":[205],"large":[207],"datasets":[208],"faithful":[210],"theoretical":[213],"elegance":[214]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2022-09-02T00:00:00"}
