{"id":"https://openalex.org/W4386422246","doi":"https://doi.org/10.1186/s13040-023-00342-0","title":"STAR_outliers: a python package that separates univariate outliers from non-normal distributions","display_name":"STAR_outliers: a python package that separates univariate outliers from non-normal distributions","publication_year":2023,"publication_date":"2023-09-04","ids":{"openalex":"https://openalex.org/W4386422246","doi":"https://doi.org/10.1186/s13040-023-00342-0","pmid":"https://pubmed.ncbi.nlm.nih.gov/37667378"},"language":"en","primary_location":{"id":"doi:10.1186/s13040-023-00342-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-023-00342-0","pdf_url":"https://biodatamining.biomedcentral.com/counter/pdf/10.1186/s13040-023-00342-0","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://biodatamining.biomedcentral.com/counter/pdf/10.1186/s13040-023-00342-0","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012437309","display_name":"John Gregg","orcid":"https://orcid.org/0000-0002-2619-3440"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"John T. Gregg","raw_affiliation_strings":["Department of Biostatistics, Epidemiology and Informatics, University of Pennsylvania, Philadelphia, PA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Biostatistics, Epidemiology and Informatics, University of Pennsylvania, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032971510","display_name":"Jason H. Moore","orcid":"https://orcid.org/0000-0002-5015-1099"},"institutions":[{"id":"https://openalex.org/I1282927834","display_name":"Cedars-Sinai Medical Center","ror":"https://ror.org/02pammg90","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1282927834"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jason H. Moore","raw_affiliation_strings":["Department of Computational Biomedicine, Cedars-Sinai Medical Center, Los Angeles, CA, 90069, USA. jason.moore@csmc.edu","Department of Computational Biomedicine, Cedars-Sinai Medical Center, Los Angeles, CA, 90069, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computational Biomedicine, Cedars-Sinai Medical Center, Los Angeles, CA, 90069, USA. jason.moore@csmc.edu","institution_ids":[]},{"raw_affiliation_string":"Department of Computational Biomedicine, Cedars-Sinai Medical Center, Los Angeles, CA, 90069, USA","institution_ids":["https://openalex.org/I1282927834"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5032971510"],"corresponding_institution_ids":["https://openalex.org/I1282927834"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":1.0548,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.81605022,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"16","issue":"1","first_page":"25","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11871","display_name":"Advanced Statistical Methods and Models","score":0.9760000109672546,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9613999724388123,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.922152042388916},{"id":"https://openalex.org/keywords/univariate","display_name":"Univariate","score":0.8661526441574097},{"id":"https://openalex.org/keywords/kurtosis","display_name":"Kurtosis","score":0.6414374709129333},{"id":"https://openalex.org/keywords/bimodality","display_name":"Bimodality","score":0.6242331266403198},{"id":"https://openalex.org/keywords/anomaly-detection","display_name":"Anomaly detection","score":0.6017524003982544},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5427676439285278},{"id":"https://openalex.org/keywords/skew","display_name":"Skew","score":0.4842064380645752},{"id":"https://openalex.org/keywords/robust-statistics","display_name":"Robust statistics","score":0.4601379930973053},{"id":"https://openalex.org/keywords/skewness","display_name":"Skewness","score":0.41574180126190186},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.399075448513031},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3638623058795929},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.36108508706092834},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3568974733352661},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3400786519050598},{"id":"https://openalex.org/keywords/multivariate-statistics","display_name":"Multivariate statistics","score":0.33239275217056274}],"concepts":[{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.922152042388916},{"id":"https://openalex.org/C199163554","wikidata":"https://www.wikidata.org/wiki/Q1681619","display_name":"Univariate","level":3,"score":0.8661526441574097},{"id":"https://openalex.org/C166963901","wikidata":"https://www.wikidata.org/wiki/Q287251","display_name":"Kurtosis","level":2,"score":0.6414374709129333},{"id":"https://openalex.org/C2779287300","wikidata":"https://www.wikidata.org/wiki/Q4913752","display_name":"Bimodality","level":3,"score":0.6242331266403198},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.6017524003982544},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5427676439285278},{"id":"https://openalex.org/C43711488","wikidata":"https://www.wikidata.org/wiki/Q7534783","display_name":"Skew","level":2,"score":0.4842064380645752},{"id":"https://openalex.org/C67226441","wikidata":"https://www.wikidata.org/wiki/Q1665389","display_name":"Robust statistics","level":3,"score":0.4601379930973053},{"id":"https://openalex.org/C122342681","wikidata":"https://www.wikidata.org/wiki/Q330828","display_name":"Skewness","level":2,"score":0.41574180126190186},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.399075448513031},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3638623058795929},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.36108508706092834},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3568974733352661},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3400786519050598},{"id":"https://openalex.org/C161584116","wikidata":"https://www.wikidata.org/wiki/Q1952580","display_name":"Multivariate statistics","level":2,"score":0.33239275217056274},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C98444146","wikidata":"https://www.wikidata.org/wiki/Q318","display_name":"Galaxy","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1186/s13040-023-00342-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-023-00342-0","pdf_url":"https://biodatamining.biomedcentral.com/counter/pdf/10.1186/s13040-023-00342-0","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},{"id":"pmid:37667378","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37667378","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData mining","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:10476292","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/10476292","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC10476292/pdf/13040_2023_Article_342.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BioData Min","raw_type":"Text"},{"id":"pmh:oai:doaj.org/article:a4370a7967984acc8d30e0d5281d3bb6","is_oa":true,"landing_page_url":"https://doaj.org/article/a4370a7967984acc8d30e0d5281d3bb6","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BioData Mining, Vol 16, Iss 1, Pp 1-15 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s13040-023-00342-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13040-023-00342-0","pdf_url":"https://biodatamining.biomedcentral.com/counter/pdf/10.1186/s13040-023-00342-0","source":{"id":"https://openalex.org/S84409260","display_name":"BioData Mining","issn_l":"1756-0381","issn":["1756-0381"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BioData Mining","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Life in Land","id":"https://metadata.un.org/sdg/15","score":0.6299999952316284}],"awards":[{"id":"https://openalex.org/G6682602594","display_name":null,"funder_award_id":"LM010098","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4386422246.pdf"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W171938871","https://openalex.org/W1595972636","https://openalex.org/W1986332411","https://openalex.org/W1995443851","https://openalex.org/W1997817740","https://openalex.org/W2024112601","https://openalex.org/W2062247477","https://openalex.org/W2134857847","https://openalex.org/W2290009291","https://openalex.org/W2995619225","https://openalex.org/W3102617097","https://openalex.org/W3128465814","https://openalex.org/W4234098531","https://openalex.org/W4313565142","https://openalex.org/W6783155939"],"related_works":["https://openalex.org/W2085739372","https://openalex.org/W4225568567","https://openalex.org/W4286378979","https://openalex.org/W1496883226","https://openalex.org/W4297337052","https://openalex.org/W2028605949","https://openalex.org/W2282665605","https://openalex.org/W3216026256","https://openalex.org/W4289406402","https://openalex.org/W4315866209"],"abstract_inverted_index":{"There":[0],"are":[1],"not":[2,195,203],"currently":[3],"any":[4],"univariate":[5,18,57,106,189,198,238,247,273,446],"outlier":[6,95,154,167,179,190,216,230,279,286,389,424,447],"detection":[7,168],"algorithms":[8,21,156,169,254,372],"that":[9,54,76,151,233,311,353,407,439],"transform":[10],"and":[11,28,34,47,72,84,90,144,148,200,267,302,309,318,321,380],"model":[12,22,26,32,212,235,244,337,352],"arbitrarily":[13,109,236,245],"shaped":[14,110,237,246],"distributions":[15,60,100,111,206],"to":[16,124,159,185,242,391,414],"remove":[17,125,160],"outliers.":[19],"Some":[20],"skew,":[23,68],"even":[24],"fewer":[25,268],"kurtosis,":[27,70],"none":[29],"of":[30,51,97,207,364,400,417,445],"them":[31,184],"bimodality":[33],"monotonicity.":[35,73],"To":[36],"overcome":[37],"these":[38,371,420],"challenges,":[39],"we":[40,149,182,249,343],"have":[41,170,250],"implemented":[42,433],"an":[43,215,303,431],"algorithm":[44,232,258,288,306,349,410],"for":[45,188,197,227,316,436],"Skew":[46,138],"Tail-heaviness":[48],"Adjusted":[49],"Removal":[50],"Outliers":[52],"(STAR_outliers)":[53],"robustly":[55],"removes":[56,78,132,262,312,411],"outliers":[58,80,107,161,266,313,438],"from":[59,108,135,162,294,419],"with":[61,81,101,210,299,350],"many":[62,357],"different":[63],"shape":[64],"profiles,":[65],"including":[66],"extreme":[67,69],"bimodality,":[71],"We":[74,368,405],"show":[75,150,406],"STAR_outliers":[77,261,360,409,429],"simulated":[79,264],"greater":[82,102],"recall":[83],"precision":[85],"than":[86,270,422],"several":[87,152,252,271,277],"general":[88],"algorithms,":[89,181],"it":[91],"also":[92,344,369],"models":[93],"the":[94,136,163,333,340,346,374,388,396,401],"bounds":[96],"real":[98],"data":[99,134,385],"accuracy.Background":[103],"Reliably":[104],"removing":[105,355,437],"is":[112,224,430],"a":[113,211,225,228,256,351],"difficult":[114],"task.":[115],"Incorrectly":[116],"assuming":[117],"unimodality":[118],"or":[119],"overestimating":[120],"tail":[121,129,143],"heaviness":[122,130],"fails":[123],"outliers,":[126],"while":[127,314],"underestimating":[128],"incorrectly":[131],"regular":[133],"tails.":[137],"often":[139,157],"produces":[140],"one":[141,145],"heavy":[142],"light":[146,164],"tail,":[147],"sophisticated":[153],"removal":[155,180,231,280,287,425],"fail":[158],"tail.":[165],"Multivariate":[166],"recently":[171],"become":[172],"popular,":[173],"but":[174],"having":[175],"tested":[176],"PyOD's":[177,282],"multivariate":[178],"found":[183],"be":[186,219],"inadequate":[187],"removal.":[191,448],"They":[192],"usually":[193],"do":[194,202],"allow":[196],"input,":[199],"they":[201],"fit":[204,339],"their":[205],"outliership":[208,341,366],"scores":[209],"on":[213,291,373,427],"which":[214],"threshold":[217,390],"can":[218,234],"accurately":[220],"established.":[221],"Thus,":[222],"there":[223],"need":[226],"flexible":[229],"distributions.Results":[239],"In":[240],"order":[241,363],"effectively":[243],"distributions,":[248],"combined":[251],"well-established":[253],"into":[255],"new":[257],"called":[259],"STAR_outliers.":[260],"more":[263],"true":[265],"non-outliers":[269],"other":[272,423],"algorithms.":[274],"These":[275],"include":[276],"normality-assuming":[278],"methods,":[281],"isolation":[283,347],"forest":[284,348],"(IF)":[285],"(ACM":[289],"Transactions":[290],"Knowledge":[292],"Discovery":[293],"Data":[295],"(TKDD)":[296],"6:3,":[297],"2012)":[298],"default":[300,336],"settings,":[301],"IQR":[304],"based":[305],"by":[307,386],"Verardi":[308],"Vermandele":[310],"accounting":[315],"skew":[317],"kurtosis":[319],"(Verardi":[320],"Vermandele,":[322],"Journal":[323],"de":[324,328],"la":[325],"Soci\u00e9t\u00e9":[326],"Fran\u00e7aise":[327],"Statistique":[329],"157:90-114,":[330],"2016).":[331],"Since":[332],"IF":[334],"algorithm's":[335],"poorly":[338],"scores,":[342],"compared":[345,370],"entails":[354],"as":[356,359],"datapoints":[358],"does":[361],"in":[362],"decreasing":[365],"scores.":[367],"publicly":[375],"available":[376],"2018":[377],"National":[378],"Health":[379],"Nutrition":[381],"Examination":[382],"Survey":[383],"(NHANES)":[384],"setting":[387],"keep":[392],"values":[393,418],"falling":[394],"within":[395],"main":[397],"99.3":[398],"percent":[399,416],"fitted":[402],"model's":[403],"domain.":[404],"our":[408],"significantly":[412],"closer":[413],"0.7":[415],"features":[421],"methods":[426,444],"average.Conclusions":[428],"easily":[432],"python":[434],"package":[435],"outperforms":[440],"multiple":[441],"commonly":[442],"used":[443]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
