{"id":"https://openalex.org/W3197003309","doi":"https://doi.org/10.1186/s13040-021-00274-7","title":"Evaluation of different approaches for missing data imputation on features associated to genomic data","display_name":"Evaluation of different approaches for missing data imputation on features associated to genomic data","publication_year":2021,"publication_date":"2021-09-03","ids":{"openalex":"https://openalex.org/W3197003309","doi":"https://doi.org/10.1186/s13040-021-00274-7","mag":"3197003309","pmid":"https://pubmed.ncbi.nlm.nih.gov/34479616"},"language":"en","primary_location":{"id":"doi:10.1186/s13040-021-00274-7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-021-00274-7","pdf_url":"https://biodatamining.biomedcentral.com/track/pdf/10.1186/s13040-021-00274-7","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://biodatamining.biomedcentral.com/track/pdf/10.1186/s13040-021-00274-7","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061614889","display_name":"Ben Omega Petrazzini","orcid":"https://orcid.org/0000-0001-9789-9371"},"institutions":[{"id":"https://openalex.org/I4210156231","display_name":"Institut Pasteur de Montevideo","ror":"https://ror.org/04dpm2z73","country_code":"UY","type":"facility","lineage":["https://openalex.org/I4210156231"]},{"id":"https://openalex.org/I98704320","display_name":"Icahn School of Medicine at Mount Sinai","ror":"https://ror.org/04a9tmd77","country_code":"US","type":"education","lineage":["https://openalex.org/I1320796813","https://openalex.org/I98704320"]}],"countries":["US","UY"],"is_corresponding":true,"raw_author_name":"Ben Omega Petrazzini","raw_affiliation_strings":["Bioinformatics Unit, Institut Pasteur de Montevideo, Mataojo 2020, 11400, Montevideo, Uruguay","Department of Genetics and Genomic Sciences, Icahn School of Medicine at Mount Sinai, New York, New York, USA","The Charles Bronfman Institute for Personalized Medicine, Icahn School of Medicine at Mount Sinai, New York, New York, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bioinformatics Unit, Institut Pasteur de Montevideo, Mataojo 2020, 11400, Montevideo, Uruguay","institution_ids":["https://openalex.org/I4210156231"]},{"raw_affiliation_string":"Department of Genetics and Genomic Sciences, Icahn School of Medicine at Mount Sinai, New York, New York, USA","institution_ids":["https://openalex.org/I98704320"]},{"raw_affiliation_string":"The Charles Bronfman Institute for Personalized Medicine, Icahn School of Medicine at Mount Sinai, New York, New York, USA","institution_ids":["https://openalex.org/I98704320"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061239954","display_name":"Hugo Naya","orcid":"https://orcid.org/0000-0001-6982-4399"},"institutions":[{"id":"https://openalex.org/I4210156231","display_name":"Institut Pasteur de Montevideo","ror":"https://ror.org/04dpm2z73","country_code":"UY","type":"facility","lineage":["https://openalex.org/I4210156231"]},{"id":"https://openalex.org/I180910786","display_name":"Universidad de la Rep\u00fablica","ror":"https://ror.org/030bbe882","country_code":"UY","type":"education","lineage":["https://openalex.org/I180910786"]}],"countries":["UY"],"is_corresponding":false,"raw_author_name":"Hugo Naya","raw_affiliation_strings":["Bioinformatics Unit, Institut Pasteur de Montevideo, Mataojo 2020, 11400, Montevideo, Uruguay","Departamento de Producci\u00f3n Animal y Pasturas, Facultad de Agronom\u00eda, Universidad de la Rep\u00fablica, 12900, Montevideo, Uruguay"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bioinformatics Unit, Institut Pasteur de Montevideo, Mataojo 2020, 11400, Montevideo, Uruguay","institution_ids":["https://openalex.org/I4210156231"]},{"raw_affiliation_string":"Departamento de Producci\u00f3n Animal y Pasturas, Facultad de Agronom\u00eda, Universidad de la Rep\u00fablica, 12900, Montevideo, Uruguay","institution_ids":["https://openalex.org/I180910786"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001466772","display_name":"Fernando L\u00f3pez-Bello","orcid":null},"institutions":[{"id":"https://openalex.org/I180910786","display_name":"Universidad de la Rep\u00fablica","ror":"https://ror.org/030bbe882","country_code":"UY","type":"education","lineage":["https://openalex.org/I180910786"]}],"countries":["UY"],"is_corresponding":false,"raw_author_name":"Fernando Lopez-Bello","raw_affiliation_strings":["PEDECIBA Bioinform\u00e1tica, Universidad de la Rep\u00fablica, Montevideo, Uruguay"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PEDECIBA Bioinform\u00e1tica, Universidad de la Rep\u00fablica, Montevideo, Uruguay","institution_ids":["https://openalex.org/I180910786"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009531765","display_name":"Gustavo E. V\u00e1zquez","orcid":"https://orcid.org/0000-0002-5549-7929"},"institutions":[{"id":"https://openalex.org/I875251973","display_name":"Universidad Cat\u00f3lica del Uruguay","ror":"https://ror.org/019xvpc30","country_code":"UY","type":"education","lineage":["https://openalex.org/I875251973"]}],"countries":["UY"],"is_corresponding":false,"raw_author_name":"Gustavo Vazquez","raw_affiliation_strings":["Department of Informatics and Computer Science, Universidad Cat\u00f3lica del Uruguay, Av. 8 de Octubre, 2738, 11600, Montevideo, Uruguay"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Informatics and Computer Science, Universidad Cat\u00f3lica del Uruguay, Av. 8 de Octubre, 2738, 11600, Montevideo, Uruguay","institution_ids":["https://openalex.org/I875251973"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039314715","display_name":"Luc\u00eda Spangenberg","orcid":"https://orcid.org/0000-0001-5124-9250"},"institutions":[{"id":"https://openalex.org/I4210156231","display_name":"Institut Pasteur de Montevideo","ror":"https://ror.org/04dpm2z73","country_code":"UY","type":"facility","lineage":["https://openalex.org/I4210156231"]}],"countries":["UY"],"is_corresponding":false,"raw_author_name":"Luc\u00eda Spangenberg","raw_affiliation_strings":["Bioinformatics Unit, Institut Pasteur de Montevideo, Mataojo 2020, 11400, Montevideo, Uruguay. lucia@pasteur.edu.uy","Department of Informatics and Computer Science, Universidad Cat\u00f3lica del Uruguay, Av. 8 de Octubre, 2738, 11600, Montevideo, Uruguay. lucia@pasteur.edu.uy","Bioinformatics Unit, Institut Pasteur de Montevideo, Mataojo 2020, 11400, Montevideo, Uruguay"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bioinformatics Unit, Institut Pasteur de Montevideo, Mataojo 2020, 11400, Montevideo, Uruguay. lucia@pasteur.edu.uy","institution_ids":[]},{"raw_affiliation_string":"Department of Informatics and Computer Science, Universidad Cat\u00f3lica del Uruguay, Av. 8 de Octubre, 2738, 11600, Montevideo, Uruguay. lucia@pasteur.edu.uy","institution_ids":[]},{"raw_affiliation_string":"Bioinformatics Unit, Institut Pasteur de Montevideo, Mataojo 2020, 11400, Montevideo, Uruguay","institution_ids":["https://openalex.org/I4210156231"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5061614889"],"corresponding_institution_ids":["https://openalex.org/I4210156231","https://openalex.org/I98704320"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":6.7805,"has_fulltext":true,"cited_by_count":50,"citation_normalized_percentile":{"value":0.97439608,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"14","issue":"1","first_page":"44","last_page":"44"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10261","display_name":"Genetic Associations and Epidemiology","score":0.476500004529953,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10261","display_name":"Genetic Associations and Epidemiology","score":0.476500004529953,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11213","display_name":"Genomic variations and chromosomal abnormalities","score":0.21209999918937683,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.025800000876188278,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/imputation","display_name":"Imputation (statistics)","score":0.845515251159668},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.7552733421325684},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.6887851357460022},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6849120855331421},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5768523216247559},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.27828383445739746},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2125178873538971}],"concepts":[{"id":"https://openalex.org/C58041806","wikidata":"https://www.wikidata.org/wiki/Q1660484","display_name":"Imputation (statistics)","level":3,"score":0.845515251159668},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.7552733421325684},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.6887851357460022},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6849120855331421},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5768523216247559},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.27828383445739746},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2125178873538971}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/s13040-021-00274-7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-021-00274-7","pdf_url":"https://biodatamining.biomedcentral.com/track/pdf/10.1186/s13040-021-00274-7","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},{"id":"pmid:34479616","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34479616","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData mining","raw_type":null},{"id":"pmh:oai:doaj.org/article:386db40a09e84f86bf7e17a7ddf0eb05","is_oa":true,"landing_page_url":"https://doaj.org/article/386db40a09e84f86bf7e17a7ddf0eb05","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BioData Mining, Vol 14, Iss 1, Pp 1-13 (2021)","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:8414708","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8414708","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BioData Min","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/s13040-021-00274-7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-021-00274-7","pdf_url":"https://biodatamining.biomedcentral.com/track/pdf/10.1186/s13040-021-00274-7","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","score":0.7099999785423279,"display_name":"Life in Land"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3197003309.pdf","grobid_xml":"https://content.openalex.org/works/W3197003309.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W273955616","https://openalex.org/W1570076217","https://openalex.org/W1580791986","https://openalex.org/W1984068087","https://openalex.org/W2011582941","https://openalex.org/W2016210396","https://openalex.org/W2018189081","https://openalex.org/W2038473742","https://openalex.org/W2044758663","https://openalex.org/W2064186732","https://openalex.org/W2065974896","https://openalex.org/W2076357933","https://openalex.org/W2100358124","https://openalex.org/W2104846587","https://openalex.org/W2106306904","https://openalex.org/W2115098571","https://openalex.org/W2116814040","https://openalex.org/W2127346557","https://openalex.org/W2132949918","https://openalex.org/W2146481951","https://openalex.org/W2148105023","https://openalex.org/W2155845523","https://openalex.org/W2157963336","https://openalex.org/W2161978970","https://openalex.org/W2168470985","https://openalex.org/W2443893970","https://openalex.org/W2793698072","https://openalex.org/W2794373321","https://openalex.org/W2895781865","https://openalex.org/W2905452503","https://openalex.org/W2932881901","https://openalex.org/W3121539865","https://openalex.org/W3197494818","https://openalex.org/W4240988544","https://openalex.org/W4248010344","https://openalex.org/W4256567637"],"related_works":["https://openalex.org/W2181530120","https://openalex.org/W4211215373","https://openalex.org/W2024529227","https://openalex.org/W2055961818","https://openalex.org/W1574575415","https://openalex.org/W3144172081","https://openalex.org/W3179858851","https://openalex.org/W3028371478","https://openalex.org/W2081476516","https://openalex.org/W2581984549"],"abstract_inverted_index":{"BACKGROUND:":[0],"Missing":[1,52,55,60,65],"data":[2,29,42,80,161],"is":[3,155,185],"a":[4,159,191],"common":[5],"issue":[6],"in":[7,75,112],"different":[8,35],"fields,":[9],"such":[10],"as":[11],"electronics,":[12],"image":[13],"processing,":[14],"medical":[15],"records":[16],"and":[17,38,64,74,96,105,130,169],"genomics.":[18],"They":[19,44],"can":[20,32,45],"limit":[21],"or":[22],"even":[23],"bias":[24],"the":[25,71,76,109,113,124,156,172],"posterior":[26],"analysis.":[27],"The":[28],"collection":[30],"process":[31],"lead":[33],"to":[34,150],"distribution,":[36],"frequency,":[37],"structure":[39],"of":[40,78,123],"missing":[41],"points.":[43],"be":[46],"classified":[47],"into":[48],"four":[49],"categories:":[50],"Structurally":[51],"Data":[53],"(SMD),":[54],"Completely":[56],"At":[57,61,67],"Random":[58,62,68,103,167,183],"(MCAR),":[59],"(MAR)":[63],"Not":[66],"(MNAR).":[69],"For":[70],"three":[72],"later,":[73],"context":[77],"genomic":[79,201],"(especially":[81],"non-coding":[82,180],"data),":[83],"we":[84],"will":[85],"discuss":[86],"six":[87],"imputation":[88,121,137,153,174],"approaches":[89],"using":[90],"31,245":[91],"variants":[92],"collected":[93],"from":[94,207],"ClinVar":[95],"annotated":[97],"with":[98],"13":[99],"genome-wide":[100],"features.":[101],"RESULTS:":[102],"Forest":[104,168,184],"kNN":[106,170,189],"algorithms":[107,139],"showed":[108],"best":[110,157,173],"performance":[111],"evaluated":[114],"dataset.":[115],"Additionally,":[116],"some":[117],"features":[118,134],"show":[119,135],"robust":[120],"regardless":[122],"algorithm":[125],"(e.g.":[126,140],"conservation":[127],"scores":[128],"phyloP7":[129],"phyloP20),":[131],"while":[132],"other":[133],"poor":[136],"across":[138],"PhasCons).":[141],"We":[142,164],"also":[143],"developed":[144],"an":[145],"R":[146],"package":[147],"that":[148,166],"helps":[149],"test":[151],"which":[152],"method":[154,175],"for":[158,176],"particular":[160],"set.":[162],"CONCLUSIONS:":[163],"found":[165],"are":[171],"genomics":[177],"data,":[178],"including":[179],"variants.":[181],"Since":[182],"computationally":[186],"more":[187,192],"challenging,":[188],"remains":[190],"realistic":[193],"approach.":[194],"Future":[195],"work":[196],"on":[197],"variant":[198],"prioritization":[199],"thru":[200],"screening":[202],"tests":[203],"could":[204],"largely":[205],"profit":[206],"this":[208],"methodology.":[209]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":12},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":1}],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2025-10-10T00:00:00"}
