{"id":"https://openalex.org/W2951846000","doi":"https://doi.org/10.1093/bioinformatics/btz268","title":"A framework for space-efficient variable-order Markov models","display_name":"A framework for space-efficient variable-order Markov models","publication_year":2019,"publication_date":"2019-04-10","ids":{"openalex":"https://openalex.org/W2951846000","doi":"https://doi.org/10.1093/bioinformatics/btz268","mag":"2951846000","pmid":"https://pubmed.ncbi.nlm.nih.gov/31004473"},"language":"en","primary_location":{"id":"doi:10.1093/bioinformatics/btz268","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btz268","pdf_url":null,"source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007639641","display_name":"Fabio Cunial","orcid":"https://orcid.org/0000-0003-0282-5738"},"institutions":[{"id":"https://openalex.org/I4210148197","display_name":"Center for Systems Biology Dresden","ror":"https://ror.org/05hrn3e05","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I149899117","https://openalex.org/I4210111875","https://openalex.org/I4210148197","https://openalex.org/I4210159854","https://openalex.org/I78650965"]},{"id":"https://openalex.org/I4210159854","display_name":"Max Planck Institute of Molecular Cell Biology and Genetics","ror":"https://ror.org/05b8d3w18","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210159854"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Fabio Cunial","raw_affiliation_strings":["Max Planck Institute for Molecular Cell Biology and Genetics (MPI-CBG), and Center for Systems Biology Dresden (CSBD) , Dresden 01307, Germany"],"affiliations":[{"raw_affiliation_string":"Max Planck Institute for Molecular Cell Biology and Genetics (MPI-CBG), and Center for Systems Biology Dresden (CSBD) , Dresden 01307, Germany","institution_ids":["https://openalex.org/I4210148197","https://openalex.org/I4210159854"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067903348","display_name":"Jarno Alanko","orcid":"https://orcid.org/0000-0002-8003-9225"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Jarno Alanko","raw_affiliation_strings":["Department of Computer Science, University of Helsinki , Helsinki 00014, Finland"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Helsinki , Helsinki 00014, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059502958","display_name":"Djamal Belazzougui","orcid":null},"institutions":[{"id":"https://openalex.org/I4210095777","display_name":"Centre de Recherche sur l'Information Scientifique et Technique","ror":"https://ror.org/01k1bte55","country_code":"DZ","type":"government","lineage":["https://openalex.org/I4210095777","https://openalex.org/I4210114810"]}],"countries":["DZ"],"is_corresponding":false,"raw_author_name":"Djamal Belazzougui","raw_affiliation_strings":["CAPA, DTISI, Centre de Recherche sur l'Information Scientifique et Technique , Algiers, Algeria"],"affiliations":[{"raw_affiliation_string":"CAPA, DTISI, Centre de Recherche sur l'Information Scientifique et Technique , Algiers, Algeria","institution_ids":["https://openalex.org/I4210095777"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5007639641"],"corresponding_institution_ids":["https://openalex.org/I4210148197","https://openalex.org/I4210159854"],"apc_list":{"value":3618,"currency":"USD","value_usd":3618},"apc_paid":null,"fwci":1.22894196,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.83611062,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"35","issue":"22","first_page":"4607","last_page":"4616"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7260691523551941},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.6725478172302246},{"id":"https://openalex.org/keywords/trie","display_name":"Trie","score":0.6091316342353821},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.5911362767219543},{"id":"https://openalex.org/keywords/variable","display_name":"Variable (mathematics)","score":0.5309593081474304},{"id":"https://openalex.org/keywords/suffix","display_name":"Suffix","score":0.4506365656852722},{"id":"https://openalex.org/keywords/smoothing","display_name":"Smoothing","score":0.44708070158958435},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.43093952536582947},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.42968642711639404},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4288511276245117},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4168773293495178},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.4113059937953949},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.38174116611480713},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.3748103678226471},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2521877884864807},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21805214881896973}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7260691523551941},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.6725478172302246},{"id":"https://openalex.org/C190290938","wikidata":"https://www.wikidata.org/wiki/Q387015","display_name":"Trie","level":3,"score":0.6091316342353821},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.5911362767219543},{"id":"https://openalex.org/C182365436","wikidata":"https://www.wikidata.org/wiki/Q50701","display_name":"Variable (mathematics)","level":2,"score":0.5309593081474304},{"id":"https://openalex.org/C2779804580","wikidata":"https://www.wikidata.org/wiki/Q102047","display_name":"Suffix","level":2,"score":0.4506365656852722},{"id":"https://openalex.org/C3770464","wikidata":"https://www.wikidata.org/wiki/Q775963","display_name":"Smoothing","level":2,"score":0.44708070158958435},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.43093952536582947},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.42968642711639404},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4288511276245117},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4168773293495178},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.4113059937953949},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38174116611480713},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.3748103678226471},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2521877884864807},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21805214881896973},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011336","descriptor_name":"Probability","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1093/bioinformatics/btz268","is_oa":false,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btz268","pdf_url":null,"source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":false,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics","raw_type":"journal-article"},{"id":"pmid:31004473","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/31004473","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Bioinformatics (Oxford, England)","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1499032625","https://openalex.org/W1525720445","https://openalex.org/W1541794469","https://openalex.org/W1551373730","https://openalex.org/W1573352354","https://openalex.org/W1577336816","https://openalex.org/W1795489290","https://openalex.org/W1878541814","https://openalex.org/W1962019683","https://openalex.org/W1969867523","https://openalex.org/W1994920552","https://openalex.org/W2004083484","https://openalex.org/W2011485152","https://openalex.org/W2015622114","https://openalex.org/W2032870665","https://openalex.org/W2074229180","https://openalex.org/W2082967074","https://openalex.org/W2089319476","https://openalex.org/W2095963120","https://openalex.org/W2096556032","https://openalex.org/W2096669041","https://openalex.org/W2100423944","https://openalex.org/W2103960658","https://openalex.org/W2105199251","https://openalex.org/W2107903949","https://openalex.org/W2108228587","https://openalex.org/W2113641473","https://openalex.org/W2114257671","https://openalex.org/W2114499853","https://openalex.org/W2116190532","https://openalex.org/W2117619142","https://openalex.org/W2119047110","https://openalex.org/W2122766688","https://openalex.org/W2124195715","https://openalex.org/W2129995544","https://openalex.org/W2131415145","https://openalex.org/W2133253129","https://openalex.org/W2136209011","https://openalex.org/W2142195237","https://openalex.org/W2146162567","https://openalex.org/W2153990926","https://openalex.org/W2158195707","https://openalex.org/W2161628678","https://openalex.org/W2162039942","https://openalex.org/W2163166295","https://openalex.org/W2163294786","https://openalex.org/W2171560765","https://openalex.org/W2172078094","https://openalex.org/W2301207708","https://openalex.org/W2532463985","https://openalex.org/W2553985332","https://openalex.org/W2611814747","https://openalex.org/W2963766645","https://openalex.org/W3104314953","https://openalex.org/W4229830074","https://openalex.org/W4230932396","https://openalex.org/W4234588445","https://openalex.org/W6629817413","https://openalex.org/W6633216439","https://openalex.org/W6638466415","https://openalex.org/W6641201905","https://openalex.org/W6675016256"],"related_works":["https://openalex.org/W3157818565","https://openalex.org/W4220850540","https://openalex.org/W2046569047","https://openalex.org/W2186419898","https://openalex.org/W2048294592","https://openalex.org/W2003608043","https://openalex.org/W2370961680","https://openalex.org/W2278452282","https://openalex.org/W2156393489","https://openalex.org/W1552925710"],"abstract_inverted_index":{"Abstract":[0],"Motivation":[1],"Markov":[2,63,68,232],"models":[3,24,64,233],"with":[4,19,237,241],"contexts":[5,196,243],"of":[6,17,61,66,75,103,108,127,151,159,195,205],"variable":[7],"length":[8,107,192],"are":[9,31,45,52,214,261],"widely":[10],"used":[11,236],"in":[12],"bioinformatics":[13],"for":[14,47],"representing":[15],"sets":[16],"sequences":[18],"similar":[20],"biological":[21],"properties.":[22],"When":[23],"contain":[25],"many":[26],"long":[27],"contexts,":[28,109],"existing":[29],"implementations":[30,96,177,221],"either":[32],"unable":[33],"to":[34,89,112,139,144,148,157,171,187,197,203,234],"handle":[35],"genome-scale":[36],"training":[37,153],"datasets":[38,239],"within":[39],"typical":[40],"memory":[41],"budgets,":[42],"or":[43,121,208,227],"they":[44],"optimized":[46],"specific":[48],"model":[49],"variants":[50],"and":[51,65,83,85,106,110,166,193,240,254],"thus":[53,248],"inflexible.":[54],"Results":[55],"We":[56,136],"provide":[57],"practical,":[58],"versatile":[59],"representations":[60],"variable-order":[62,231],"interpolated":[67],"models,":[69],"that":[70,86,213],"support":[71],"a":[72,145,215],"large":[73],"number":[74,105],"context-selection":[76],"criteria,":[77],"scoring":[78],"functions,":[79],"probability":[80],"smoothing":[81],"methods,":[82],"interpolations,":[84],"take":[87],"up":[88,111,156,170],"four":[90],"times":[91,114,173,217],"less":[92,115],"space":[93,116,161],"than":[94,117,175,219],"previous":[95,118,176,220],"based":[97,178,222],"on":[98,162,179,190,223,244],"the":[99,104,125,149,152,180,191,224,245],"suffix":[100,181,225],"array,":[101,226],"regardless":[102],"ten":[113],"trie-based":[119],"representations,":[120],"more,":[122,209],"while":[123],"matching":[124],"size":[126,207],"related,":[128],"state-of-the-art":[129],"data":[130,211,260],"structures":[131,212],"from":[132],"Natural":[133],"Language":[134],"Processing.":[135],"describe":[137],"how":[138,186],"further":[140,198],"compress":[141],"our":[142,200],"indexes":[143,202],"quantity":[146],"related":[147],"redundancy":[150],"data,":[154],"saving":[155],"90%":[158],"their":[160,206],"very":[163],"repetitive":[164],"datasets,":[165],"making":[167],"them":[168],"become":[169],"60":[172],"smaller":[174,218],"array.":[182],"Finally,":[183],"we":[184],"show":[185],"exploit":[188],"constraints":[189],"frequency":[194],"shrink":[199],"compressed":[201],"half":[204],"achieving":[210],"hundred":[216],"more.":[228],"This":[229],"allows":[230],"be":[235],"bigger":[238],"longer":[242],"same":[246],"hardware,":[247],"possibly":[249],"enabling":[250],"new":[251],"applications.":[252],"Availability":[253],"implementation":[255],"https://github.com/jnalanko/VOMM":[256],"Supplementary":[257,259],"information":[258],"available":[262],"at":[263],"Bioinformatics":[264],"online.":[265]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-23T05:10:03.516525","created_date":"2025-10-10T00:00:00"}
