{"id":"https://openalex.org/W4388869936","doi":"https://doi.org/10.1089/cmb.2024.0544","title":"Sketching Methods with Small Window Guarantee Using Minimum Decycling Sets","display_name":"Sketching Methods with Small Window Guarantee Using Minimum Decycling Sets","publication_year":2024,"publication_date":"2024-07-01","ids":{"openalex":"https://openalex.org/W4388869936","doi":"https://doi.org/10.1089/cmb.2024.0544","pmid":"https://pubmed.ncbi.nlm.nih.gov/37986724"},"language":"en","primary_location":{"id":"doi:10.1089/cmb.2024.0544","is_oa":true,"landing_page_url":"https://doi.org/10.1089/cmb.2024.0544","pdf_url":null,"source":{"id":"https://openalex.org/S78571599","display_name":"Journal of Computational Biology","issn_l":"1066-5277","issn":["1066-5277","1557-8666"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320443","host_organization_name":"Mary Ann Liebert, Inc.","host_organization_lineage":["https://openalex.org/P4310320443"],"host_organization_lineage_names":["Mary Ann Liebert, Inc."],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computational Biology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1089/cmb.2024.0544","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009407234","display_name":"Guillaume Mar\u00e7ais","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Guillaume Mar\u00e7ais","raw_affiliation_strings":["Ray and Stephanie Lane Computational Biology Department, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA"],"affiliations":[{"raw_affiliation_string":"Ray and Stephanie Lane Computational Biology Department, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014004628","display_name":"Dan DeBlasio","orcid":"https://orcid.org/0000-0003-4110-4431"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dan DeBlasio","raw_affiliation_strings":["Ray and Stephanie Lane Computational Biology Department, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA"],"affiliations":[{"raw_affiliation_string":"Ray and Stephanie Lane Computational Biology Department, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113653378","display_name":"Carl Kingsford","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carl Kingsford","raw_affiliation_strings":["Ray and Stephanie Lane Computational Biology Department, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA"],"affiliations":[{"raw_affiliation_string":"Ray and Stephanie Lane Computational Biology Department, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5009407234"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":0.913,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.71561437,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"31","issue":"7","first_page":"597","last_page":"615"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10521","display_name":"RNA and protein synthesis mechanisms","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10207","display_name":"Advanced biosensing and bioanalysis techniques","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sketch","display_name":"Sketch","score":0.7458690404891968},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.7395737767219543},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6940141320228577},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.671227753162384},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5684071779251099},{"id":"https://openalex.org/keywords/window","display_name":"Window (computing)","score":0.5179859399795532},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5136389136314392},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.5089375972747803},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.48821309208869934},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4199843108654022},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21354049444198608},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09546893835067749}],"concepts":[{"id":"https://openalex.org/C2779231336","wikidata":"https://www.wikidata.org/wiki/Q7534724","display_name":"Sketch","level":2,"score":0.7458690404891968},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.7395737767219543},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6940141320228577},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.671227753162384},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5684071779251099},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.5179859399795532},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5136389136314392},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.5089375972747803},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.48821309208869934},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4199843108654022},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21354049444198608},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09546893835067749},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1089/cmb.2024.0544","is_oa":true,"landing_page_url":"https://doi.org/10.1089/cmb.2024.0544","pdf_url":null,"source":{"id":"https://openalex.org/S78571599","display_name":"Journal of Computational Biology","issn_l":"1066-5277","issn":["1066-5277","1557-8666"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320443","host_organization_name":"Mary Ann Liebert, Inc.","host_organization_lineage":["https://openalex.org/P4310320443"],"host_organization_lineage_names":["Mary Ann Liebert, Inc."],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computational Biology","raw_type":"journal-article"},{"id":"pmid:37986724","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37986724","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ArXiv","raw_type":null},{"id":"pmid:38980804","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/38980804","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of computational biology : a journal of computational molecular cell biology","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:10659450","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/10659450","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC10659450/pdf/nihpp-2311.03592v1.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ArXiv","raw_type":"Text"},{"id":"pmh:oai:pubmedcentral.nih.gov:11304339","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/11304339","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11304339/pdf/cmb.2024.0544.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Comput Biol","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1089/cmb.2024.0544","is_oa":true,"landing_page_url":"https://doi.org/10.1089/cmb.2024.0544","pdf_url":null,"source":{"id":"https://openalex.org/S78571599","display_name":"Journal of Computational Biology","issn_l":"1066-5277","issn":["1066-5277","1557-8666"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320443","host_organization_name":"Mary Ann Liebert, Inc.","host_organization_lineage":["https://openalex.org/P4310320443"],"host_organization_lineage_names":["Mary Ann Liebert, Inc."],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computational Biology","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1310392732","display_name":null,"funder_award_id":"DBI-1937540","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G3788099519","display_name":null,"funder_award_id":"R01HG012470","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G3858817854","display_name":null,"funder_award_id":"III-2232121","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6667550415","display_name":null,"funder_award_id":"1937540","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7686337660","display_name":null,"funder_award_id":"DBI-1937540","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8144893449","display_name":null,"funder_award_id":"2232121","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1972418517","https://openalex.org/W1980664567","https://openalex.org/W1987535905","https://openalex.org/W1988388271","https://openalex.org/W2010454899","https://openalex.org/W2039516104","https://openalex.org/W2058219874","https://openalex.org/W2060884261","https://openalex.org/W2111295912","https://openalex.org/W2144560237","https://openalex.org/W2487384794","https://openalex.org/W2763390627","https://openalex.org/W2950572599","https://openalex.org/W2951822379","https://openalex.org/W2973021521","https://openalex.org/W3016701095","https://openalex.org/W3023465514","https://openalex.org/W3042377559","https://openalex.org/W3043768949","https://openalex.org/W3117679888","https://openalex.org/W3128964673","https://openalex.org/W3182040180","https://openalex.org/W3200242814","https://openalex.org/W4243450674","https://openalex.org/W4285283141","https://openalex.org/W4295443308","https://openalex.org/W4307454143","https://openalex.org/W4307475426","https://openalex.org/W4385708519","https://openalex.org/W4386022705","https://openalex.org/W4386272000"],"related_works":["https://openalex.org/W2378994405","https://openalex.org/W2385974820","https://openalex.org/W2373478030","https://openalex.org/W2378679551","https://openalex.org/W3149739944","https://openalex.org/W2392363776","https://openalex.org/W2063051341","https://openalex.org/W2591066345","https://openalex.org/W1494563618","https://openalex.org/W2357022711"],"abstract_inverted_index":{"Most":[0],"sequence":[1,27,36,80,104],"sketching":[2,38,111,162,183,215],"methods":[3,39,175,216],"work":[4],"by":[5,141,164,242],"selecting":[6],"specific":[7],"<i>k</i>-mers":[8,167],"from":[9,147,168],"sequences":[10,17],"so":[11],"that":[12,65,98,293],"the":[13,23,44,59,72,75,93,103,108,126,152,166,169,186,278,294,305],"similarity":[14,28],"between":[15],"two":[16,240,251],"can":[18],"be":[19],"estimated":[20],"using":[21,32,35,52,66,79],"only":[22],"sketches.":[24],"Because":[25],"estimating":[26],"is":[29,131,191,208],"much":[30,192],"faster":[31],"sketches":[33,53],"than":[34],"alignment,":[37],"are":[40,88,230,246,256,297,321],"used":[41],"to":[42,63,121,212,249,269,276,283,299,318],"reduce":[43],"computational":[45,48,314],"requirements":[46],"of":[47,58,74,85,95,102,125,134,178,182,188,231,235,261,280,311],"biology":[49],"software.":[50],"Applications":[51],"often":[54],"rely":[55],"on":[56],"properties":[57,87],"<i>k</i>-mer":[60,146],"selection":[61],"procedure":[62],"ensure":[64],"a":[67,114,122,132,145,156,161,179,209,258,266],"sketch":[68],"does":[69],"not":[70],"degrade":[71],"quality":[73],"results":[76],"compared":[77],"with":[78,113,200,217,221],"alignment.":[81],"Two":[82],"important":[83],"examples":[84],"such":[86],"locality":[89],"and":[90,194,244,282,313,315],"window":[91,115,223],"guarantees,":[92],"latter":[94],"which":[96,130],"ensures":[97],"no":[99],"long":[100,138],"region":[101],"goes":[105],"unrepresented":[106],"in":[107],"sketch.":[109],"A":[110,309],"method":[112,163,184,268,273],"guarantee,":[116],"implicitly":[117],"or":[118],"explicitly,":[119],"corresponds":[120],"<i>decycling":[123],"set</i>":[124],"de":[127],"Bruijn":[128],"graph,":[129],"set":[133,150,158,170],"unavoidable":[135,153],"<i>k</i>-mers.":[136],"Any":[137],"enough":[139],"sequence,":[140],"definition,":[142],"must":[143],"contain":[144],"any":[148],"decycling":[149,157,189,198],"(hence,":[151],"property).":[154],"Conversely,":[155],"also":[159],"defines":[160],"choosing":[165],"as":[171],"representatives.":[172],"Although":[173],"current":[174],"use":[176],"one":[177,275,302],"small":[180,204,222],"number":[181,260,310],"families,":[185],"space":[187,279],"sets":[190,199,296],"larger":[193],"largely":[195],"unexplored.":[196],"Finding":[197],"desirable":[201,288],"characteristics":[202],"(e.g.,":[203,220],"remaining":[205,306],"path":[206,307],"length)":[207],"promising":[210],"approach":[211],"discovering":[213],"new":[214],"improved":[218],"performance":[219],"guarantee).":[224],"The":[225],"<i>Minimum":[226],"Decycling":[227],"Sets</i>":[228],"(MDSs)":[229],"particular":[232,252,303],"interest":[233],"because":[234],"their":[236],"minimum":[237],"size.":[238],"Only":[239],"algorithms,":[241],"Mykkeltveit":[243,295],"Champarnaud,":[245],"previously":[247],"known":[248],"generate":[250],"MDSs,":[253],"although":[254],"there":[255],"typically":[257],"vast":[259],"alternative":[262],"MDSs.":[263,271],"We":[264,290],"provide":[265],"simple":[267],"enumerate":[270],"This":[272],"allows":[274],"explore":[277],"MDSs":[281,285],"find":[284],"optimized":[286],"for":[287],"properties.":[289],"give":[291],"evidence":[292,317],"close":[298],"optimal":[300],"regarding":[301],"property,":[304],"length.":[308],"conjectures":[312],"theoretical":[316],"support":[319],"them":[320],"presented.":[322],"Code":[323],"available":[324],"at":[325],"https://github.com/Kingsford-Group/mdsscope.":[326]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-12T07:58:50.170612","created_date":"2023-11-22T00:00:00"}
