{"id":"https://openalex.org/W3193164600","doi":"https://doi.org/10.1145/3459930.3469537","title":"A <i>k</i> -mer query tool for assessing population diversity in pangenomes","display_name":"A <i>k</i> -mer query tool for assessing population diversity in pangenomes","publication_year":2021,"publication_date":"2021-07-30","ids":{"openalex":"https://openalex.org/W3193164600","doi":"https://doi.org/10.1145/3459930.3469537","mag":"3193164600"},"language":"en","primary_location":{"id":"doi:10.1145/3459930.3469537","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3459930.3469537","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047377288","display_name":"Hang Su","orcid":"https://orcid.org/0000-0003-3030-0509"},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hang Su","raw_affiliation_strings":["University of North Carolina Chapel Hill"],"affiliations":[{"raw_affiliation_string":"University of North Carolina Chapel Hill","institution_ids":["https://openalex.org/I114027177"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100637860","display_name":"Ziwei Chen","orcid":"https://orcid.org/0000-0002-6186-0907"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ziwei Chen","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017454980","display_name":"Maya L. Najarian","orcid":null},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maya L Najarian","raw_affiliation_strings":["University of North Carolina Chapel Hill"],"affiliations":[{"raw_affiliation_string":"University of North Carolina Chapel Hill","institution_ids":["https://openalex.org/I114027177"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044640981","display_name":"Martin T. Ferris","orcid":"https://orcid.org/0000-0003-1241-6268"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Martin T. Ferris","raw_affiliation_strings":["University of North Carolina"],"affiliations":[{"raw_affiliation_string":"University of North Carolina","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018305231","display_name":"Fernando Pardo\u2010Manuel de Villena","orcid":"https://orcid.org/0000-0002-5738-5795"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fernando Pardo-Manuel de Villena","raw_affiliation_strings":["University of North Carolina"],"affiliations":[{"raw_affiliation_string":"University of North Carolina","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042463953","display_name":"Leonard McMillan","orcid":"https://orcid.org/0000-0002-8453-0847"},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Leonard McMillan","raw_affiliation_strings":["University of North Carolina Chapel Hill"],"affiliations":[{"raw_affiliation_string":"University of North Carolina Chapel Hill","institution_ids":["https://openalex.org/I114027177"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5047377288"],"corresponding_institution_ids":["https://openalex.org/I114027177"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07593152,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10012","display_name":"Genetic diversity and population structure","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10434","display_name":"Chromosomal and Genetic Variations","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1110","display_name":"Plant Science"},"field":{"id":"https://openalex.org/fields/11","display_name":"Agricultural and Biological Sciences"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5518044233322144},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.49396687746047974},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.492213636636734},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.4438115656375885},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.43423986434936523},{"id":"https://openalex.org/keywords/row","display_name":"Row","score":0.4123263359069824},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3325446844100952},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.26391351222991943},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2206924557685852},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.13803523778915405},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.12599992752075195},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09738209843635559}],"concepts":[{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5518044233322144},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.49396687746047974},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.492213636636734},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.4438115656375885},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.43423986434936523},{"id":"https://openalex.org/C135598885","wikidata":"https://www.wikidata.org/wiki/Q1366302","display_name":"Row","level":2,"score":0.4123263359069824},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3325446844100952},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.26391351222991943},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2206924557685852},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.13803523778915405},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.12599992752075195},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09738209843635559},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3459930.3469537","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3459930.3469537","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 12th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7099999785423279}],"awards":[{"id":"https://openalex.org/G3575417915","display_name":null,"funder_award_id":"U24-HG010100","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2007736033","https://openalex.org/W2033409266","https://openalex.org/W2051304374","https://openalex.org/W2051511420","https://openalex.org/W2081784805","https://openalex.org/W2092138236","https://openalex.org/W2103441770","https://openalex.org/W2124985265","https://openalex.org/W2127901812","https://openalex.org/W2131106408","https://openalex.org/W2150550043","https://openalex.org/W2155286781","https://openalex.org/W2158322625","https://openalex.org/W2161488606","https://openalex.org/W2624657184","https://openalex.org/W2759456806","https://openalex.org/W2888300707","https://openalex.org/W2894305622","https://openalex.org/W2950122868","https://openalex.org/W2963158913","https://openalex.org/W2997001777"],"related_works":["https://openalex.org/W4281971614","https://openalex.org/W2051487156","https://openalex.org/W2390933768","https://openalex.org/W2073681303","https://openalex.org/W2467236363","https://openalex.org/W3175260668","https://openalex.org/W2953259538","https://openalex.org/W3122721839","https://openalex.org/W2184455175","https://openalex.org/W2292909562"],"abstract_inverted_index":{"Inexpensive":[0],"and":[1,36,75,100,159,174],"fast":[2],"genome":[3,8],"sequencing":[4],"has":[5],"yielded":[6],"multiple":[7],"assemblies":[9,29],"that,":[10],"taken":[11],"together,":[12],"can":[13],"be":[14],"considered":[15],"as":[16,73,189],"a":[17,31,55,65,171,190],"single":[18],"pangenome":[19,32,57,72,201],"model.":[20,202],"However,":[21],"applying":[22],"conventional":[23],"alignment-based":[24],"sequence":[25,83,120,196],"analysis":[26],"to":[27,54,91,127,135,150,178],"the":[28,47,71,80,96,118,123,128,131,137,145,157,167,182,200],"of":[30,49,68,87,95,122,170],"is":[33,89],"computationally":[34],"expensive":[35],"largely":[37],"redundant.":[38],"Here,":[39],"we":[40],"present":[41],"an":[42,114],"alignment-free":[43],"method":[44],"that":[45,104],"analyzes":[46],"relationship":[48],"any":[50],"new":[51,124],"sample":[52],"relative":[53],"given":[56],"model":[58],"using":[59,113],"selected":[60],"k-mer":[61,109,129,161],"queries.":[62],"We":[63,155],"select":[64],"representative":[66],"set":[67],"k-mers":[69],"from":[70],"probes":[74,88,103,132],"determine":[76],"their":[77,179],"frequencies":[78,110],"in":[79,166,181],"raw":[81,119],"short-read":[82],"data.":[84],"The":[85,108,184],"selection":[86],"designed":[90],"cover":[92],"every":[93],"base":[94],"pangenome,":[97],"maximize":[98,136],"sharing,":[99],"identify":[101],"informative":[102],"discriminate":[105],"between":[106,140],"haplotypes.":[107],"are":[111,133],"determined":[112],"FM-index":[115],"built":[116],"over":[117],"data":[121],"sample.":[125],"Prior":[126],"search,":[130],"reordered":[134],"shared":[138],"suffixes":[139],"succesive":[141],"k-mers,":[142],"thus":[143],"reducing":[144],"overall":[146],"run":[147],"time":[148],"compared":[149],"executing":[151],"each":[152],"search":[153],"independently.":[154],"aggregate":[156],"forward":[158],"reverse":[160],"probe":[162,186],"counts,":[163],"save":[164],"them":[165,176],"appropriate":[168],"rows":[169],"count":[172],"matrix":[173],"remap":[175],"back":[177],"locations":[180],"pangenome.":[183],"resulting":[185],"database":[187],"serves":[188],"valuable":[191],"resource":[192],"for":[193],"representing":[194],"population-scale":[195],"variations":[197],"based":[198],"on":[199]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2021-08-16T00:00:00"}
