{"id":"https://openalex.org/W2951846000","doi":"https://doi.org/10.1093/bioinformatics/btz268","title":"A framework for space-efficient variable-order Markov models","display_name":"A framework for space-efficient variable-order Markov models","publication_year":2019,"publication_date":"2019-04-10","ids":{"openalex":"https://openalex.org/W2951846000","doi":"https://doi.org/10.1093/bioinformatics/btz268","mag":"2951846000","pmid":"https://pubmed.ncbi.nlm.nih.gov/31004473"},"language":"en","primary_location":{"id":"doi:10.1093/bioinformatics/btz268","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btz268","pdf_url":null,"source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007639641","display_name":"Fabio Cunial","orcid":"https://orcid.org/0000-0003-0282-5738"},"institutions":[{"id":"https://openalex.org/I4210148197","display_name":"Center for Systems Biology Dresden","ror":"https://ror.org/05hrn3e05","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I149899117","https://openalex.org/I4210111875","https://openalex.org/I4210148197","https://openalex.org/I4210159854","https://openalex.org/I78650965"]},{"id":"https://openalex.org/I4210159854","display_name":"Max Planck Institute of Molecular Cell Biology and Genetics","ror":"https://ror.org/05b8d3w18","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210159854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Fabio Cunial","raw_affiliation_strings":["Max Planck Institute for Molecular Cell Biology and Genetics (MPI-CBG), and Center for Systems Biology Dresden (CSBD) , Dresden 01307, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Molecular Cell Biology and Genetics (MPI-CBG), and Center for Systems Biology Dresden (CSBD) , Dresden 01307, Germany","institution_ids":["https://openalex.org/I4210148197","https://openalex.org/I4210159854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067903348","display_name":"Jarno Alanko","orcid":"https://orcid.org/0000-0002-8003-9225"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Jarno Alanko","raw_affiliation_strings":["Department of Computer Science, University of Helsinki , Helsinki 00014, Finland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Helsinki , Helsinki 00014, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059502958","display_name":"Djamal Belazzougui","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095777","display_name":"Centre de Recherche sur l'Information Scientifique et Technique","ror":"https://ror.org/01k1bte55","country_code":"DZ","type":"government","lineage":["https://openalex.org/I4210095777","https://openalex.org/I4210114810"]}],"countries":["DZ"],"is_corresponding":false,"raw_author_name":"Djamal Belazzougui","raw_affiliation_strings":["CAPA, DTISI, Centre de Recherche sur l'Information Scientifique et Technique , Algiers, Algeria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CAPA, DTISI, Centre de Recherche sur l'Information Scientifique et Technique , Algiers, Algeria","institution_ids":["https://openalex.org/I4210095777"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5007639641"],"corresponding_institution_ids":["https://openalex.org/I4210148197","https://openalex.org/I4210159854"],"apc_list":{"value":3618,"currency":"USD","value_usd":3618},"apc_paid":null,"fwci":1.1607,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.84164481,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"35","issue":"22","first_page":"4607","last_page":"4616"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.2653999924659729,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.2653999924659729,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.13930000364780426,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.08169999718666077,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7260691523551941},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.6725478172302246},{"id":"https://openalex.org/keywords/trie","display_name":"Trie","score":0.6091316342353821},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.5911362767219543},{"id":"https://openalex.org/keywords/variable","display_name":"Variable (mathematics)","score":0.5309593081474304},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.4506365656852722},{"id":"https://openalex.org/keywords/smoothing","display_name":"Smoothing","score":0.44708070158958435},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.43093952536582947},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.42968642711639404},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4288511276245117},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4168773293495178},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.4113059937953949},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.38174116611480713},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.3748103678226471},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2521877884864807},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21805214881896973}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7260691523551941},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.6725478172302246},{"id":"https://openalex.org/C190290938","wikidata":"https://www.wikidata.org/wiki/Q387015","display_name":"Trie","level":3,"score":0.6091316342353821},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.5911362767219543},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.5309593081474304},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.4506365656852722},{"id":"https://openalex.org/C3770464","wikidata":"https://www.wikidata.org/wiki/Q775963","display_name":"Smoothing","level":2,"score":0.44708070158958435},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.43093952536582947},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.42968642711639404},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4288511276245117},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4168773293495178},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.4113059937953949},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38174116611480713},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.3748103678226471},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2521877884864807},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21805214881896973},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1093/bioinformatics/btz268","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btz268","pdf_url":null,"source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},{"id":"pmid:31004473","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/31004473","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics (Oxford, England)","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1499032625","https://openalex.org/W1525720445","https://openalex.org/W1541794469","https://openalex.org/W1551373730","https://openalex.org/W1573352354","https://openalex.org/W1577336816","https://openalex.org/W1795489290","https://openalex.org/W1878541814","https://openalex.org/W1962019683","https://openalex.org/W1969867523","https://openalex.org/W1994920552","https://openalex.org/W2004083484","https://openalex.org/W2011485152","https://openalex.org/W2015622114","https://openalex.org/W2032870665","https://openalex.org/W2074229180","https://openalex.org/W2082967074","https://openalex.org/W2089319476","https://openalex.org/W2095963120","https://openalex.org/W2096556032","https://openalex.org/W2096669041","https://openalex.org/W2100423944","https://openalex.org/W2103960658","https://openalex.org/W2105199251","https://openalex.org/W2107903949","https://openalex.org/W2108228587","https://openalex.org/W2113641473","https://openalex.org/W2114257671","https://openalex.org/W2114499853","https://openalex.org/W2116190532","https://openalex.org/W2117619142","https://openalex.org/W2119047110","https://openalex.org/W2122766688","https://openalex.org/W2124195715","https://openalex.org/W2129995544","https://openalex.org/W2131415145","https://openalex.org/W2133253129","https://openalex.org/W2136209011","https://openalex.org/W2142195237","https://openalex.org/W2146162567","https://openalex.org/W2153990926","https://openalex.org/W2158195707","https://openalex.org/W2161628678","https://openalex.org/W2162039942","https://openalex.org/W2163166295","https://openalex.org/W2163294786","https://openalex.org/W2171560765","https://openalex.org/W2172078094","https://openalex.org/W2301207708","https://openalex.org/W2532463985","https://openalex.org/W2553985332","https://openalex.org/W2611814747","https://openalex.org/W2963766645","https://openalex.org/W3104314953","https://openalex.org/W4229830074","https://openalex.org/W4230932396","https://openalex.org/W4234588445","https://openalex.org/W6629817413","https://openalex.org/W6633216439","https://openalex.org/W6638466415","https://openalex.org/W6641201905","https://openalex.org/W6675016256"],"related_works":["https://openalex.org/W3157818565","https://openalex.org/W4220850540","https://openalex.org/W2046569047","https://openalex.org/W2186419898","https://openalex.org/W2048294592","https://openalex.org/W2003608043","https://openalex.org/W2370961680","https://openalex.org/W2278452282","https://openalex.org/W2156393489","https://openalex.org/W1552925710"],"abstract_inverted_index":{"MOTIVATION:":[0],"Markov":[1,62,67,231],"models":[2,23,63,232],"with":[3,18,236,240],"contexts":[4,195,242],"of":[5,16,60,65,74,102,107,126,150,158,194,204],"variable":[6],"length":[7,106,191],"are":[8,30,44,51,213,260],"widely":[9],"used":[10,235],"in":[11],"bioinformatics":[12],"for":[13,46],"representing":[14],"sets":[15],"sequences":[17],"similar":[19],"biological":[20],"properties.":[21],"When":[22],"contain":[24],"many":[25],"long":[26],"contexts,":[27,108],"existing":[28],"implementations":[29,95,176,220],"either":[31],"unable":[32],"to":[33,88,111,138,143,147,156,170,186,196,202,233],"handle":[34],"genome-scale":[35],"training":[36,152],"datasets":[37,238],"within":[38],"typical":[39],"memory":[40],"budgets,":[41],"or":[42,120,207,226],"they":[43],"optimized":[45],"specific":[47],"model":[48],"variants":[49],"and":[50,64,82,84,105,109,165,192,239],"thus":[52,247],"inflexible.":[53],"RESULTS:":[54],"We":[55,135],"provide":[56],"practical,":[57],"versatile":[58],"representations":[59],"variable-order":[61,230],"interpolated":[66],"models,":[68],"that":[69,85,212],"support":[70],"a":[71,144,214],"large":[72],"number":[73,104],"context-selection":[75],"criteria,":[76],"scoring":[77],"functions,":[78],"probability":[79],"smoothing":[80],"methods,":[81],"interpolations,":[83],"take":[86],"up":[87,110,155,169],"four":[89],"times":[90,113,172,216],"less":[91,114],"space":[92,115,160],"than":[93,116,174,218],"previous":[94,117,175,219],"based":[96,177,221],"on":[97,161,178,189,222,243],"the":[98,103,124,148,151,179,190,223,244],"suffix":[99,180,224],"array,":[100,225],"regardless":[101],"ten":[112],"trie-based":[118],"representations,":[119],"more,":[121,208],"while":[122],"matching":[123],"size":[125,206],"related,":[127],"state-of-the-art":[128],"data":[129,210,259],"structures":[130,211],"from":[131],"Natural":[132],"Language":[133],"Processing.":[134],"describe":[136],"how":[137,185],"further":[139,197],"compress":[140],"our":[141,199],"indexes":[142,201],"quantity":[145],"related":[146],"redundancy":[149],"data,":[153],"saving":[154],"90%":[157],"their":[159,205],"very":[162],"repetitive":[163],"datasets,":[164],"making":[166],"them":[167],"become":[168],"60":[171],"smaller":[173,217],"array.":[181],"Finally,":[182],"we":[183],"show":[184],"exploit":[187],"constraints":[188],"frequency":[193],"shrink":[198],"compressed":[200],"half":[203],"achieving":[209],"hundred":[215],"more.":[227],"This":[228],"allows":[229],"be":[234],"bigger":[237],"longer":[241],"same":[245],"hardware,":[246],"possibly":[248],"enabling":[249],"new":[250],"applications.":[251],"AVAILABILITY":[252],"AND":[253],"IMPLEMENTATION:":[254],"https://github.com/jnalanko/VOMM.":[255],"SUPPLEMENTARY":[256],"INFORMATION:":[257],"Supplementary":[258],"available":[261],"at":[262],"Bioinformatics":[263],"online.":[264]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
