{"id":"https://openalex.org/W7133322979","doi":"https://doi.org/10.1186/s12859-025-06365-0","title":"DNA sequence contamination analyzer (DNASCAN): a supervised analysis toolkit for detecting and removing DNA contaminants","display_name":"DNA sequence contamination analyzer (DNASCAN): a supervised analysis toolkit for detecting and removing DNA contaminants","publication_year":2026,"publication_date":"2026-03-03","ids":{"openalex":"https://openalex.org/W7133322979","doi":"https://doi.org/10.1186/s12859-025-06365-0","pmid":"https://pubmed.ncbi.nlm.nih.gov/41776385"},"language":"en","primary_location":{"id":"doi:10.1186/s12859-025-06365-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06365-0","pdf_url":null,"source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1186/s12859-025-06365-0","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"John Stephen Malamon","orcid":null},"institutions":[{"id":"https://openalex.org/I51713134","display_name":"University of Colorado Anschutz Medical Campus","ror":"https://ror.org/03wmf1y16","country_code":"US","type":"education","lineage":["https://openalex.org/I51713134"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"John Stephen Malamon","raw_affiliation_strings":["Division of Transplant Surgery, Colorado Center for Transplantation Care, Research and Education (CCTCARE), Aurora, CO, 80045, USA. john.malamon@cuanschutz.edu","Division of Transplant Surgery, Department of Surgery, University of Colorado, Anschutz Medical Campus, 1635 Aurora Court, Aurora, CO, 80045, USA. john.malamon@cuanschutz.edu"],"affiliations":[{"raw_affiliation_string":"Division of Transplant Surgery, Colorado Center for Transplantation Care, Research and Education (CCTCARE), Aurora, CO, 80045, USA. john.malamon@cuanschutz.edu","institution_ids":["https://openalex.org/I51713134"]},{"raw_affiliation_string":"Division of Transplant Surgery, Department of Surgery, University of Colorado, Anschutz Medical Campus, 1635 Aurora Court, Aurora, CO, 80045, USA. john.malamon@cuanschutz.edu","institution_ids":["https://openalex.org/I51713134"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I51713134"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.68726193,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"27","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11970","display_name":"Molecular Biology Techniques and Applications","score":0.19539999961853027,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11970","display_name":"Molecular Biology Techniques and Applications","score":0.19539999961853027,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.17069999873638153,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.11640000343322754,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dna-microarray","display_name":"DNA microarray","score":0.6315000057220459},{"id":"https://openalex.org/keywords/contamination","display_name":"Contamination","score":0.6151000261306763},{"id":"https://openalex.org/keywords/dna-sequencing","display_name":"DNA sequencing","score":0.6000000238418579},{"id":"https://openalex.org/keywords/dna","display_name":"DNA","score":0.5027999877929688},{"id":"https://openalex.org/keywords/genomic-dna","display_name":"genomic DNA","score":0.43720000982284546},{"id":"https://openalex.org/keywords/genomics","display_name":"Genomics","score":0.4219000041484833},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.41530001163482666},{"id":"https://openalex.org/keywords/metagenomics","display_name":"Metagenomics","score":0.37779998779296875},{"id":"https://openalex.org/keywords/genome","display_name":"Genome","score":0.36730000376701355}],"concepts":[{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.6348000168800354},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.6315000057220459},{"id":"https://openalex.org/C112570922","wikidata":"https://www.wikidata.org/wiki/Q60528603","display_name":"Contamination","level":2,"score":0.6151000261306763},{"id":"https://openalex.org/C51679486","wikidata":"https://www.wikidata.org/wiki/Q380546","display_name":"DNA sequencing","level":3,"score":0.6000000238418579},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.5763000249862671},{"id":"https://openalex.org/C552990157","wikidata":"https://www.wikidata.org/wiki/Q7430","display_name":"DNA","level":2,"score":0.5027999877929688},{"id":"https://openalex.org/C17757408","wikidata":"https://www.wikidata.org/wiki/Q4135516","display_name":"genomic DNA","level":3,"score":0.43720000982284546},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.4219000041484833},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.41530001163482666},{"id":"https://openalex.org/C15151743","wikidata":"https://www.wikidata.org/wiki/Q903778","display_name":"Metagenomics","level":3,"score":0.37779998779296875},{"id":"https://openalex.org/C141231307","wikidata":"https://www.wikidata.org/wiki/Q7020","display_name":"Genome","level":3,"score":0.36730000376701355},{"id":"https://openalex.org/C61053724","wikidata":"https://www.wikidata.org/wiki/Q1154615","display_name":"Sequence analysis","level":3,"score":0.36559998989105225},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.3625999987125397},{"id":"https://openalex.org/C49105822","wikidata":"https://www.wikidata.org/wiki/Q176996","display_name":"Polymerase chain reaction","level":3,"score":0.36230000853538513},{"id":"https://openalex.org/C78063203","wikidata":"https://www.wikidata.org/wiki/Q3062596","display_name":"DNA extraction","level":4,"score":0.36090001463890076},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.3467000126838684},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.3361000120639801},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.30820000171661377},{"id":"https://openalex.org/C135763542","wikidata":"https://www.wikidata.org/wiki/Q106016","display_name":"Genotype","level":3,"score":0.2906999886035919},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C137858568","wikidata":"https://www.wikidata.org/wiki/Q7239","display_name":"Organism","level":2,"score":0.2854999899864197},{"id":"https://openalex.org/C31467283","wikidata":"https://www.wikidata.org/wiki/Q912147","display_name":"Genotyping","level":4,"score":0.27239999175071716},{"id":"https://openalex.org/C40767141","wikidata":"https://www.wikidata.org/wiki/Q285697","display_name":"Recombinant DNA","level":3,"score":0.266400009393692},{"id":"https://openalex.org/C10390740","wikidata":"https://www.wikidata.org/wiki/Q2282401","display_name":"Analyte","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.25760000944137573},{"id":"https://openalex.org/C79386861","wikidata":"https://www.wikidata.org/wiki/Q476697","display_name":"DNA profiling","level":3,"score":0.2547999918460846}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1186/s12859-025-06365-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06365-0","pdf_url":null,"source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:41776385","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41776385","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:ff1a697aadfe4779b4b8caeeb6e85d61","is_oa":true,"landing_page_url":"https://doaj.org/article/ff1a697aadfe4779b4b8caeeb6e85d61","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 27, Iss 1 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s12859-025-06365-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-025-06365-0","pdf_url":null,"source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Life in Land","score":0.571125864982605,"id":"https://metadata.un.org/sdg/15"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W291087796","https://openalex.org/W1520812622","https://openalex.org/W1797580880","https://openalex.org/W1831050183","https://openalex.org/W1970729332","https://openalex.org/W1983675813","https://openalex.org/W1991167302","https://openalex.org/W2003660803","https://openalex.org/W2044863747","https://openalex.org/W2047242127","https://openalex.org/W2051177122","https://openalex.org/W2051769878","https://openalex.org/W2052903163","https://openalex.org/W2058900879","https://openalex.org/W2070060204","https://openalex.org/W2070871769","https://openalex.org/W2098666592","https://openalex.org/W2099328768","https://openalex.org/W2113873264","https://openalex.org/W2129243997","https://openalex.org/W2129275226","https://openalex.org/W2148907915","https://openalex.org/W2164183767","https://openalex.org/W2169474803","https://openalex.org/W2170486072","https://openalex.org/W2243973145","https://openalex.org/W2394897061","https://openalex.org/W2472248166","https://openalex.org/W2575568020","https://openalex.org/W2927168226","https://openalex.org/W2944602870","https://openalex.org/W2945153165","https://openalex.org/W2947110074","https://openalex.org/W2951912016","https://openalex.org/W2952030146","https://openalex.org/W2955160207","https://openalex.org/W2965689387","https://openalex.org/W2980551774","https://openalex.org/W2982209070","https://openalex.org/W2985045444","https://openalex.org/W2985711816","https://openalex.org/W3005521280","https://openalex.org/W3010833257","https://openalex.org/W3028050938","https://openalex.org/W3093528466","https://openalex.org/W3158757007","https://openalex.org/W4206010635","https://openalex.org/W4224436908","https://openalex.org/W4282932758","https://openalex.org/W4309076895","https://openalex.org/W4310855439","https://openalex.org/W4316190287","https://openalex.org/W4323313435","https://openalex.org/W4365479235","https://openalex.org/W4385486963","https://openalex.org/W4386879837","https://openalex.org/W4392242077","https://openalex.org/W4401840812","https://openalex.org/W4402485368","https://openalex.org/W4408967584"],"related_works":[],"abstract_inverted_index":{"DNA":[0,28,50,55,101,120,182],"N-gram":[1,124],"analysis":[2,37],"methodologies":[3],"have":[4],"been":[5],"successfully":[6],"deployed":[7],"to":[8,16,134],"provide":[9,135],"a":[10,17,64,105],"wide":[11],"range":[12],"of":[13,19,32,38,43,49,115,180],"elegant":[14],"solutions":[15],"variety":[18],"complex":[20],"problems":[21],"in":[22,75],"bioinformatics,":[23],"such":[24,88],"as":[25,89],"sequence":[26,51],"alignment,":[27],"barcoding,":[29],"the":[30,36,41,47,112,176],"identification":[31],"functional":[33],"gene":[34,86],"elements,":[35],"microbial":[39],"genomes,":[40],"characterization":[42],"protein":[44],"structures,":[45],"and":[46,59,69,84,107,130,138,150,178,187],"detection":[48,114,133,152],"artifacts.":[52],"Because":[53],"biological":[54,116],"contamination":[56,157],"is":[57],"ubiquitous":[58],"therefore":[60],"unavoidable,":[61],"it":[62,173],"has":[63],"significant":[65],"impact":[66],"on":[67],"genomics":[68],"genetics":[70],"research,":[71,82],"posing":[72],"substantial":[73],"challenges":[74],"population":[76],"genotype":[77],"calling":[78],"quality,":[79],"model":[80],"organism":[81],"proteomics,":[83],"clinical":[85],"therapies":[87],"recombinant":[90],"adeno-associated":[91],"viral":[92],"vector":[93],"preparations.":[94],"To":[95],"this":[96],"end,":[97],"I":[98],"present":[99],"DNASCAN,":[100],"Sequence":[102],"Contamination":[103],"Analyzer,":[104],"scalable":[106],"efficient":[108],"algorithm":[109],"designed":[110],"for":[111,175],"high-resolution":[113],"contaminants":[117],"within":[118],"source":[119],"sequences.":[121,183],"DNASCAN":[122,145,165],"leverages":[123],"analysis,":[125],"supervised":[126],"random":[127],"forest":[128],"classification,":[129],"Bayesian":[131],"change-point":[132],"precise":[136],"breakpoints":[137],"highly":[139],"accurate":[140],"impurity":[141,168],"estimates.":[142],"In":[143],"summary,":[144],"yielded":[146],"100%":[147],"purity":[148],"estimates":[149],"breakpoint":[151],"accuracy":[153],"rates":[154],"at":[155,195],"bacterial":[156],"levels":[158,169],"above":[159,170],"0.1%.":[160],"Using":[161],"long-read":[162],"sequencing":[163],"data,":[164],"detected":[166],"all":[167],"0.025%,":[171],"making":[172],"ideal":[174],"removal":[177],"reconstitution":[179],"contaminated":[181],"The":[184],"software,":[185],"documentation,":[186],"vignettes":[188],"with":[189],"detailed":[190],"code":[191],"demonstrations":[192],"are":[193],"available":[194],"https://github.com/jmal0403/DNASCAN/wiki.":[196]},"counts_by_year":[],"updated_date":"2026-03-25T23:56:10.502304","created_date":"2026-03-04T00:00:00"}
