{"id":"https://openalex.org/W3147499398","doi":"https://doi.org/10.1186/s12859-021-04096-6","title":"Harvestman: a framework for hierarchical feature learning and selection from whole genome sequencing data","display_name":"Harvestman: a framework for hierarchical feature learning and selection from whole genome sequencing data","publication_year":2021,"publication_date":"2021-04-01","ids":{"openalex":"https://openalex.org/W3147499398","doi":"https://doi.org/10.1186/s12859-021-04096-6","mag":"3147499398","pmid":"https://pubmed.ncbi.nlm.nih.gov/33794760"},"language":"en","primary_location":{"id":"doi:10.1186/s12859-021-04096-6","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-021-04096-6","pdf_url":"https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-021-04096-6","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-021-04096-6","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060956530","display_name":"Trevor S. Frisby","orcid":"https://orcid.org/0000-0002-2865-6955"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Trevor S. Frisby","raw_affiliation_strings":["Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030896197","display_name":"Shawn James Baker","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shawn J. Baker","raw_affiliation_strings":["Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009407234","display_name":"Guillaume Mar\u00e7ais","orcid":"https://orcid.org/0000-0002-5083-5925"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guillaume Mar\u00e7ais","raw_affiliation_strings":["Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026666893","display_name":"Quang Minh Hoang","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Quang Minh Hoang","raw_affiliation_strings":["Computer Science Department, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Department, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113653378","display_name":"Carl Kingsford","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Carl Kingsford","raw_affiliation_strings":["Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA. carlk@cs.cmu.edu","Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA. carlk@cs.cmu.edu","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042353780","display_name":"Christopher J. Langmead","orcid":"https://orcid.org/0000-0001-7521-6736"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Christopher J. Langmead","raw_affiliation_strings":["Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA. cjl@cs.cmu.edu","Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":"https://orcid.org/0000-0001-7521-6736","affiliations":[{"raw_affiliation_string":"Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA. cjl@cs.cmu.edu","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Computational Biology Department, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5042353780","https://openalex.org/A5113653378"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":0.1653,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.58141158,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"22","issue":"1","first_page":"174","last_page":"174"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.6514999866485596,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.6514999866485596,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10261","display_name":"Genetic Associations and Epidemiology","score":0.2125999927520752,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.05849999934434891,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.5828931331634521},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.567190408706665},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5425271987915039},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.48562970757484436},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.4757130742073059},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4543834924697876},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.41944706439971924},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.12083804607391357}],"concepts":[{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.5828931331634521},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.567190408706665},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5425271987915039},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48562970757484436},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.4757130742073059},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4543834924697876},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.41944706439971924},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.12083804607391357},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000073336","descriptor_name":"Whole Genome Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000073336","descriptor_name":"Whole Genome Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000073336","descriptor_name":"Whole Genome Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001943","descriptor_name":"Breast Neoplasms","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D001943","descriptor_name":"Breast Neoplasms","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D001943","descriptor_name":"Breast Neoplasms","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016678","descriptor_name":"Genome","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D023281","descriptor_name":"Genomics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":4,"locations":[{"id":"doi:10.1186/s12859-021-04096-6","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-021-04096-6","pdf_url":"https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-021-04096-6","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:33794760","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/33794760","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:doaj.org/article:1fc059f1c09743c3894a51417a32d3cc","is_oa":true,"landing_page_url":"https://doaj.org/article/1fc059f1c09743c3894a51417a32d3cc","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 22, Iss 1, Pp 1-19 (2021)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:8017869","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8017869","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/s12859-021-04096-6","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s12859-021-04096-6","pdf_url":"https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/s12859-021-04096-6","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"No poverty","id":"https://metadata.un.org/sdg/1","score":0.5199999809265137}],"awards":[{"id":"https://openalex.org/G1742171491","display_name":null,"funder_award_id":"1548562","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G2195582371","display_name":null,"funder_award_id":"P30CA047904","funder_id":"https://openalex.org/F4320337351","funder_display_name":"National Cancer Institute"},{"id":"https://openalex.org/G2206590661","display_name":null,"funder_award_id":"ACI-1548562","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G2208054141","display_name":null,"funder_award_id":"P30 CA047904","funder_id":"https://openalex.org/F4320337351","funder_display_name":"National Cancer Institute"},{"id":"https://openalex.org/G2533901300","display_name":null,"funder_award_id":"T32 EB009403","funder_id":"https://openalex.org/F4320306082","funder_display_name":"Howard Hughes Medical Institute"},{"id":"https://openalex.org/G3023850491","display_name":null,"funder_award_id":"R01GM122935","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G3355402586","display_name":"Carl Kingsford Investigator Award","funder_award_id":"GBMF4554","funder_id":"https://openalex.org/F4320306202","funder_display_name":"Gordon and Betty Moore Foundation"},{"id":"https://openalex.org/G3549828030","display_name":null,"funder_award_id":"NIH T32","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G367708065","display_name":null,"funder_award_id":"ACI-1548562","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3709464396","display_name":null,"funder_award_id":"-1548562","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3730782400","display_name":"XSEDE 2.0:  Integrating, Enabling and Enhancing National Cyberinfrastructure with Expanding Community Involvement","funder_award_id":"1548562","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4591649018","display_name":null,"funder_award_id":"EB009403","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G5070846967","display_name":null,"funder_award_id":"CURE grant 4100070287","funder_id":"https://openalex.org/F4320308109","funder_display_name":"Pennsylvania Department of Health"},{"id":"https://openalex.org/G5836720286","display_name":null,"funder_award_id":"R01 GM122935","funder_id":"https://openalex.org/F4320337354","funder_display_name":"National Institute of General Medical Sciences"},{"id":"https://openalex.org/G5844536782","display_name":null,"funder_award_id":"P30CA047904","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G6163816949","display_name":null,"funder_award_id":"P30CA047904","funder_id":"https://openalex.org/F4320337363","funder_display_name":"National Institute of Biomedical Imaging and Bioengineering"},{"id":"https://openalex.org/G6561317925","display_name":null,"funder_award_id":"4100070287","funder_id":"https://openalex.org/F4320308109","funder_display_name":"Pennsylvania Department of Health"},{"id":"https://openalex.org/G7306899482","display_name":null,"funder_award_id":"T32 EB009403","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G7397716324","display_name":null,"funder_award_id":"T32 EB009403","funder_id":"https://openalex.org/F4320337363","funder_display_name":"National Institute of Biomedical Imaging and Bioengineering"},{"id":"https://openalex.org/G8907196067","display_name":null,"funder_award_id":"-1548562","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306082","display_name":"Howard Hughes Medical Institute","ror":"https://ror.org/006w34k90"},{"id":"https://openalex.org/F4320306202","display_name":"Gordon and Betty Moore Foundation","ror":"https://ror.org/006wxqw41"},{"id":"https://openalex.org/F4320308109","display_name":"Pennsylvania Department of Health","ror":"https://ror.org/00ra1fg11"},{"id":"https://openalex.org/F4320310207","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337351","display_name":"National Cancer Institute","ror":"https://ror.org/040gcmg81"},{"id":"https://openalex.org/F4320337354","display_name":"National Institute of General Medical Sciences","ror":"https://ror.org/04q48ey07"},{"id":"https://openalex.org/F4320337363","display_name":"National Institute of Biomedical Imaging and Bioengineering","ror":"https://ror.org/00372qc85"},{"id":"https://openalex.org/F4320337968","display_name":"Center for Machine Learning and Health, School of Computer Science, Carnegie Mellon University","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3147499398.pdf","grobid_xml":"https://content.openalex.org/works/W3147499398.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W5594912","https://openalex.org/W60493759","https://openalex.org/W160515281","https://openalex.org/W229800277","https://openalex.org/W1493357981","https://openalex.org/W1495061682","https://openalex.org/W1505191356","https://openalex.org/W1537662695","https://openalex.org/W1545302199","https://openalex.org/W1592785605","https://openalex.org/W1977868384","https://openalex.org/W2029538739","https://openalex.org/W2040884411","https://openalex.org/W2043701535","https://openalex.org/W2070299975","https://openalex.org/W2084193002","https://openalex.org/W2096283457","https://openalex.org/W2096525273","https://openalex.org/W2101234009","https://openalex.org/W2103017472","https://openalex.org/W2104095601","https://openalex.org/W2104549677","https://openalex.org/W2119387367","https://openalex.org/W2161336914","https://openalex.org/W2221443338","https://openalex.org/W2308071406","https://openalex.org/W2326913071","https://openalex.org/W2581109417","https://openalex.org/W2604808360","https://openalex.org/W2623012644","https://openalex.org/W2786582587","https://openalex.org/W2791401633","https://openalex.org/W2794032158","https://openalex.org/W2884681204","https://openalex.org/W2890052597","https://openalex.org/W2898624225","https://openalex.org/W2902815731","https://openalex.org/W2905314196","https://openalex.org/W2923862975","https://openalex.org/W2950246742","https://openalex.org/W3097993951","https://openalex.org/W4238284510","https://openalex.org/W4245543257","https://openalex.org/W4386941397"],"related_works":["https://openalex.org/W1574414179","https://openalex.org/W4362597605","https://openalex.org/W4295122168","https://openalex.org/W3155717344","https://openalex.org/W1770458422","https://openalex.org/W2989932438","https://openalex.org/W4387297750","https://openalex.org/W2186333919","https://openalex.org/W4386564352","https://openalex.org/W2952668426"],"abstract_inverted_index":{"BACKGROUND:":[0],"Supervised":[1],"learning":[2,53,158],"from":[3,118,138,212],"high-throughput":[4],"sequencing":[5],"data":[6,117,137],"presents":[7],"many":[8],"challenges.":[9],"For":[10],"one,":[11],"the":[12,47,78,119,125,157,197,236],"curse":[13],"of":[14,74,84,105,124,130,151,167,196],"dimensionality":[15],"often":[16],"leads":[17],"to":[18,58,87,103,156,173,243],"overfitting":[19],"as":[20,22],"well":[21],"issues":[23],"with":[24],"scalability.":[25],"This":[26],"can":[27],"bring":[28],"about":[29],"inaccurate":[30],"models":[31],"or":[32],"those":[33],"that":[34,71,100,145,153,180],"require":[35],"extensive":[36],"compute":[37],"time":[38],"and":[39,82,94,160,178,188,224,233,252],"resources.":[40],"Additionally,":[41],"variant":[42,213],"calls":[43],"may":[44],"not":[45],"be":[46],"optimal":[48],"encoding":[49,238],"for":[50,208,239],"a":[51,69,148,164,203,218],"given":[52],"task,":[54,159],"which":[55],"also":[56],"contributes":[57],"poor":[59],"predictive":[60],"capabilities.":[61],"To":[62],"address":[63],"these":[64],"issues,":[65],"we":[66,143],"present":[67],"HARVESTMAN,":[68],"method":[70,182],"takes":[72],"advantage":[73],"hierarchical":[75,204,245],"relationships":[76],"among":[77],"possible":[79],"biological":[80],"interpretations":[81],"representations":[83,152],"genomic":[85,222,240],"variants":[86,112,223],"perform":[88],"automatic":[89],"feature":[90,92,175,191,205,246],"learning,":[91],"selection,":[93],"model":[95,210],"building.":[96],"RESULTS:":[97],"We":[98,170],"demonstrate":[99,179],"HARVESTMAN":[101,146,172,201,231,249],"scales":[102],"thousands":[104],"genomes":[106],"comprising":[107],"more":[108,184,255],"than":[109,163],"84":[110],"million":[111],"by":[113],"processing":[114],"phase":[115],"3":[116],"1000":[120],"Genomes":[121],"Project,":[122],"one":[123],"largest":[126],"publicly":[127],"available":[128],"collection":[129],"whole":[131],"genome":[132],"sequences.":[133],"Using":[134],"breast":[135],"cancer":[136],"The":[139],"Cancer":[140],"Genome":[141],"Atlas,":[142],"show":[144],"selects":[147,186,253],"rich":[149],"combination":[150],"are":[154],"adapted":[155],"performs":[161],"better":[162],"binary":[165],"representation":[166],"SNPs":[168],"alone.":[169],"compare":[171],"existing":[174],"selection":[176,206,247],"methods":[177],"our":[181],"is":[183,202,250],"parsimonious-it":[185],"smaller":[187],"less":[189],"redundant":[190],"subsets":[192],"while":[193],"maintaining":[194],"accuracy":[195],"resulting":[198],"classifier.":[199],"CONCLUSION:":[200],"approach":[207],"supervised":[209],"building":[211,217],"call":[214],"data.":[215],"By":[216],"knowledge":[219],"graph":[220],"over":[221],"solving":[225],"an":[226],"integer":[227],"linear":[228],"program":[229],",":[230],"automatically":[232],"optimally":[234],"finds":[235],"right":[237],"variants.":[241],"Compared":[242],"other":[244],"methods,":[248],"faster":[251],"features":[254],"parsimoniously.":[256]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-24T13:16:06.693445","created_date":"2025-10-10T00:00:00"}
