{"id":"https://openalex.org/W3013997935","doi":"https://doi.org/10.1186/s40537-020-00301-0","title":"Investigating class rarity in big data","display_name":"Investigating class rarity in big data","publication_year":2020,"publication_date":"2020-03-16","ids":{"openalex":"https://openalex.org/W3013997935","doi":"https://doi.org/10.1186/s40537-020-00301-0","mag":"3013997935"},"language":"en","primary_location":{"id":"doi:10.1186/s40537-020-00301-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-020-00301-0","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00301-0","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00301-0","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079873660","display_name":"Tawfiq Hasanin","orcid":"https://orcid.org/0000-0003-1072-278X"},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tawfiq Hasanin","raw_affiliation_strings":["Florida Atlantic University, 777 Glades Road, Boca Raton, 33431, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida Atlantic University, 777 Glades Road, Boca Raton, 33431, FL, USA","institution_ids":["https://openalex.org/I63772739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089170562","display_name":"Taghi M. Khoshgoftaar","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taghi M. Khoshgoftaar","raw_affiliation_strings":["Florida Atlantic University, 777 Glades Road, Boca Raton, 33431, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida Atlantic University, 777 Glades Road, Boca Raton, 33431, FL, USA","institution_ids":["https://openalex.org/I63772739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004094853","display_name":"Joffrey L. Leevy","orcid":"https://orcid.org/0000-0002-7079-7540"},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joffrey L. Leevy","raw_affiliation_strings":["Florida Atlantic University, 777 Glades Road, Boca Raton, 33431, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida Atlantic University, 777 Glades Road, Boca Raton, 33431, FL, USA","institution_ids":["https://openalex.org/I63772739"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068409386","display_name":"Richard A. Bauder","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard A. Bauder","raw_affiliation_strings":["Florida Atlantic University, 777 Glades Road, Boca Raton, 33431, FL, USA"],"affiliations":[{"raw_affiliation_string":"Florida Atlantic University, 777 Glades Road, Boca Raton, 33431, FL, USA","institution_ids":["https://openalex.org/I63772739"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5079873660"],"corresponding_institution_ids":["https://openalex.org/I63772739"],"apc_list":{"value":1060,"currency":"GBP","value_usd":1300},"apc_paid":{"value":1060,"currency":"GBP","value_usd":1300},"fwci":2.4465,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.91091648,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"7","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/receiver-operating-characteristic","display_name":"Receiver operating characteristic","score":0.792809247970581},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6840375661849976},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6650592088699341},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.6484407186508179},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.6079005002975464},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5888962745666504},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.5590062737464905},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5272416472434998},{"id":"https://openalex.org/keywords/denial-of-service-attack","display_name":"Denial-of-service attack","score":0.45655885338783264},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.4467844069004059},{"id":"https://openalex.org/keywords/logistic-regression","display_name":"Logistic regression","score":0.4293704926967621},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.3262513279914856},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.26370102167129517},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.09952634572982788}],"concepts":[{"id":"https://openalex.org/C58471807","wikidata":"https://www.wikidata.org/wiki/Q327120","display_name":"Receiver operating characteristic","level":2,"score":0.792809247970581},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6840375661849976},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6650592088699341},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.6484407186508179},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.6079005002975464},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5888962745666504},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.5590062737464905},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5272416472434998},{"id":"https://openalex.org/C38822068","wikidata":"https://www.wikidata.org/wiki/Q131406","display_name":"Denial-of-service attack","level":3,"score":0.45655885338783264},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.4467844069004059},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.4293704926967621},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.3262513279914856},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.26370102167129517},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09952634572982788},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1186/s40537-020-00301-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-020-00301-0","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00301-0","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:8d3902a1a33d48b4a2eba30a572b826e","is_oa":true,"landing_page_url":"https://doaj.org/article/8d3902a1a33d48b4a2eba30a572b826e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Big Data, Vol 7, Iss 1, Pp 1-17 (2020)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s40537-020-00301-0","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-020-00301-0","pdf_url":"https://journalofbigdata.springeropen.com/track/pdf/10.1186/s40537-020-00301-0","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.49000000953674316}],"awards":[{"id":"https://openalex.org/G1444267946","display_name":null,"funder_award_id":"NS-1427536","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5416735113","display_name":"MRI: Acquisition of Big Data Training and Research Laboratory","funder_award_id":"1427536","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6293531692","display_name":null,"funder_award_id":"CNS-1427536","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310801","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387"},{"id":"https://openalex.org/F4320317380","display_name":"Universidad del Atl\u00e1ntico","ror":"https://ror.org/05mm1w714"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3013997935.pdf","grobid_xml":"https://content.openalex.org/works/W3013997935.grobid-xml"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W1491542233","https://openalex.org/W1497131193","https://openalex.org/W1570448133","https://openalex.org/W1598064945","https://openalex.org/W1680392829","https://openalex.org/W1966716734","https://openalex.org/W1967282477","https://openalex.org/W1974079881","https://openalex.org/W2018653639","https://openalex.org/W2023938061","https://openalex.org/W2058731966","https://openalex.org/W2088535455","https://openalex.org/W2095178814","https://openalex.org/W2101754595","https://openalex.org/W2105947650","https://openalex.org/W2121784101","https://openalex.org/W2157941646","https://openalex.org/W2161544295","https://openalex.org/W2171278097","https://openalex.org/W2171647935","https://openalex.org/W2189465200","https://openalex.org/W2312301617","https://openalex.org/W2318802957","https://openalex.org/W2500756573","https://openalex.org/W2531563875","https://openalex.org/W2539569989","https://openalex.org/W2557427388","https://openalex.org/W2562085068","https://openalex.org/W2784153182","https://openalex.org/W2887974490","https://openalex.org/W2892312135","https://openalex.org/W2899434936","https://openalex.org/W2903701969","https://openalex.org/W2909961873","https://openalex.org/W2913298375","https://openalex.org/W2934340240","https://openalex.org/W2947070947","https://openalex.org/W2962200422","https://openalex.org/W2963288913","https://openalex.org/W2963891150","https://openalex.org/W2995564009","https://openalex.org/W4285719527","https://openalex.org/W6614148910"],"related_works":["https://openalex.org/W2115282466","https://openalex.org/W2257953284","https://openalex.org/W3047552631","https://openalex.org/W3099386970","https://openalex.org/W4367596031","https://openalex.org/W2799952019","https://openalex.org/W305613802","https://openalex.org/W3159096857","https://openalex.org/W2011548984","https://openalex.org/W2070722997"],"abstract_inverted_index":{"Abstract":[0],"In":[1,154,205],"Machine":[2,39],"Learning":[3,40],",":[4,99,105],"if":[5],"one":[6],"class":[7,25,31,111,123,145,246],"has":[8],"a":[9,54,75,118,161,212],"significantly":[10],"larger":[11],"number":[12],"of":[13,38,78,122,141,218,244],"instances":[14,135],"(majority)":[15],"than":[16,57],"the":[17,35,43,94,102,155,169,174,177,184,192,195,206,225,228,239,249,254,259,262,270,281,287],"other":[18],"(minority),":[19],"this":[20,60],"condition":[21],"is":[22],"defined":[23],"as":[24,183,269],"imbalance.":[26],"With":[27],"regard":[28,276],"to":[29,64,109,136,277],"datasets,":[30],"imbalance":[32,61],"can":[33],"bias":[34],"predictive":[36],"capabilities":[37],"algorithms":[41],"towards":[42],"majority":[44],"(negative)":[45],"class,":[46],"and":[47,87,198,265],"in":[48,113,127],"situations":[49],"where":[50],"false":[51,58],"negatives":[52],"incur":[53],"greater":[55],"penalty":[56],"positives,":[59],"may":[62],"lead":[63],"adverse":[65],"consequences.":[66],"Our":[67],"paper":[68],"incorporates":[69],"two":[70],"case":[71,157,208,279],"studies,":[72,280],"each":[73],"utilizing":[74],"unique":[76],"approach":[77],"three":[79,88],"learners":[80,170,255],"(gradient-boosted":[81],"trees,":[82],"logistic":[83],"regression,":[84],"random":[85],"forest)":[86],"performance":[89,166],"metrics":[90,201,268],"(":[91],"Area":[92,100,175,193,226,260],"Under":[93,101,176,194,227,261],"Receiver":[95,178,229],"Operating":[96,179,230],"Characteristic":[97,180,231],"Curve":[98,104,181,197,232,264],"Precision-Recall":[103,196,263],"Geometric":[106,199,266],"Mean":[107,200,267],")":[108],"investigate":[110],"rarity":[112,185,271],"big":[114],"data.":[115],"Class":[116],"rarity,":[117],"notably":[119],"extreme":[120],"degree":[121],"imbalance,":[124],"was":[125],"effected":[126],"our":[128],"experiments":[129],"by":[130],"randomly":[131],"removing":[132],"minority":[133],"(positive)":[134],"artificially":[137],"generate":[138],"eight":[139],"subsets":[140,243],"gradually":[142],"decreasing":[143],"positive":[144,245],"instances.":[146,247],"All":[147],"model":[148],"evaluations":[149],"were":[150],"performed":[151],"through":[152],"Cross-Validation.":[153],"first":[156],"study,":[158,209,251],"which":[159,210],"uses":[160,211],"Medicare":[162],"Part":[163],"B":[164],"dataset,":[165],"scores":[167,190,237,252],"for":[168,238,253],"generally":[171,256],"improve":[172,257],"with":[173,191,241,258,275],"metric":[182,233],"level":[186,272],"decreases,":[187],"while":[188],"corresponding":[189],"show":[202],"no":[203],"improvement.":[204],"second":[207,250],"dataset":[213],"built":[214],"from":[215],"Distributed":[216],"Denial":[217],"Service":[219],"attack":[220,221],"data":[222],"(POSTSlowloris":[223],"Combined),":[224],"produces":[234],"very":[235],"high-performance":[236],"learners,":[240],"all":[242],"For":[248],"decreases.":[273],"Overall,":[274],"both":[278],"Gradient-Boosted":[282],"Trees":[283],"(GBT)":[284],"learner":[285],"performs":[286],"best.":[288]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":4}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
