{"id":"https://openalex.org/W3195592416","doi":"https://doi.org/10.1142/s2424922x21500054","title":"A Novel Hybrid Sampling Algorithm for Solving Class Imbalance Problem in Big Data","display_name":"A Novel Hybrid Sampling Algorithm for Solving Class Imbalance Problem in Big Data","publication_year":2021,"publication_date":"2021-04-01","ids":{"openalex":"https://openalex.org/W3195592416","doi":"https://doi.org/10.1142/s2424922x21500054","mag":"3195592416"},"language":"en","primary_location":{"id":"doi:10.1142/s2424922x21500054","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s2424922x21500054","pdf_url":null,"source":{"id":"https://openalex.org/S4210189353","display_name":"Advances in Data Science and Adaptive Analysis","issn_l":"2424-922X","issn":["2424-922X","2424-9238"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Data Science and Adaptive Analysis","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066668860","display_name":"Khyati Ahlawat","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143260","display_name":"Indira Gandhi Delhi Technical University for Women","ror":"https://ror.org/057c5p638","country_code":"IN","type":"education","lineage":["https://openalex.org/I4210143260"]},{"id":"https://openalex.org/I20791572","display_name":"Indira Gandhi Institute of Technology","ror":"https://ror.org/0010jkx06","country_code":"IN","type":"education","lineage":["https://openalex.org/I20791572"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Khyati Ahlawat","raw_affiliation_strings":["Indira Gandhi Delhi Technical University for Women, Kashmere Gate, Delhi 110006, India"],"affiliations":[{"raw_affiliation_string":"Indira Gandhi Delhi Technical University for Women, Kashmere Gate, Delhi 110006, India","institution_ids":["https://openalex.org/I20791572","https://openalex.org/I4210143260"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064896366","display_name":"Anuradha Chug","orcid":"https://orcid.org/0000-0002-3139-4490"},"institutions":[{"id":"https://openalex.org/I105454292","display_name":"Guru Gobind Singh Indraprastha University","ror":"https://ror.org/034q1za58","country_code":"IN","type":"education","lineage":["https://openalex.org/I105454292"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Anuradha Chug","raw_affiliation_strings":["University School of Information, Communication and Technology, Guru Gobind Singh Indraprastha University, Sector 16C, Dwarka, Delhi 110078, India"],"affiliations":[{"raw_affiliation_string":"University School of Information, Communication and Technology, Guru Gobind Singh Indraprastha University, Sector 16C, Dwarka, Delhi 110078, India","institution_ids":["https://openalex.org/I105454292"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008002283","display_name":"Amit Prakash Singh","orcid":"https://orcid.org/0000-0002-8675-6903"},"institutions":[{"id":"https://openalex.org/I105454292","display_name":"Guru Gobind Singh Indraprastha University","ror":"https://ror.org/034q1za58","country_code":"IN","type":"education","lineage":["https://openalex.org/I105454292"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Amit Prakash Singh","raw_affiliation_strings":["University School of Information, Communication and Technology, Guru Gobind Singh Indraprastha University, Sector 16C, Dwarka, Delhi 110078, India"],"affiliations":[{"raw_affiliation_string":"University School of Information, Communication and Technology, Guru Gobind Singh Indraprastha University, Sector 16C, Dwarka, Delhi 110078, India","institution_ids":["https://openalex.org/I105454292"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5066668860"],"corresponding_institution_ids":["https://openalex.org/I20791572","https://openalex.org/I4210143260"],"apc_list":null,"apc_paid":null,"fwci":0.136,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.54513663,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"13","issue":"02","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13429","display_name":"Electricity Theft Detection Techniques","score":0.964900016784668,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11653","display_name":"Financial Distress and Bankruptcy Prediction","score":0.9348999857902527,"subfield":{"id":"https://openalex.org/subfields/1402","display_name":"Accounting"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.9605083465576172},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.7170238494873047},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6732747554779053},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6483660936355591},{"id":"https://openalex.org/keywords/estimation-of-distribution-algorithm","display_name":"Estimation of distribution algorithm","score":0.5722262859344482},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5563291907310486},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.5381902456283569},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5380949378013611},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5369067192077637},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5249250531196594},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5148912668228149},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5108108520507812},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.4917464554309845},{"id":"https://openalex.org/keywords/k-nearest-neighbors-algorithm","display_name":"k-nearest neighbors algorithm","score":0.43804416060447693},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3583076596260071},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.12876349687576294}],"concepts":[{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.9605083465576172},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.7170238494873047},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6732747554779053},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6483660936355591},{"id":"https://openalex.org/C162500139","wikidata":"https://www.wikidata.org/wiki/Q2835887","display_name":"Estimation of distribution algorithm","level":2,"score":0.5722262859344482},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5563291907310486},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.5381902456283569},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5380949378013611},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5369067192077637},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5249250531196594},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5148912668228149},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5108108520507812},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.4917464554309845},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.43804416060447693},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3583076596260071},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.12876349687576294},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s2424922x21500054","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s2424922x21500054","pdf_url":null,"source":{"id":"https://openalex.org/S4210189353","display_name":"Advances in Data Science and Adaptive Analysis","issn_l":"2424-922X","issn":["2424-922X","2424-9238"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Data Science and Adaptive Analysis","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.5899999737739563,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W379212275","https://openalex.org/W619160221","https://openalex.org/W981727710","https://openalex.org/W1137625847","https://openalex.org/W1918839972","https://openalex.org/W1984606274","https://openalex.org/W2010089450","https://openalex.org/W2033110225","https://openalex.org/W2040263621","https://openalex.org/W2072324809","https://openalex.org/W2072750586","https://openalex.org/W2086207029","https://openalex.org/W2126623642","https://openalex.org/W2135074661","https://openalex.org/W2143944844","https://openalex.org/W2145611520","https://openalex.org/W2157954477","https://openalex.org/W2164330572","https://openalex.org/W2171647935","https://openalex.org/W2196145940","https://openalex.org/W2261525379","https://openalex.org/W2289204937","https://openalex.org/W2338318698","https://openalex.org/W2342457036","https://openalex.org/W2432436793","https://openalex.org/W2531607313","https://openalex.org/W2557427388","https://openalex.org/W2585770658","https://openalex.org/W2593875029","https://openalex.org/W2619580871","https://openalex.org/W2626229579","https://openalex.org/W2796186337","https://openalex.org/W2888059818","https://openalex.org/W2949527081","https://openalex.org/W2951930001","https://openalex.org/W2954460690","https://openalex.org/W2963730760","https://openalex.org/W2990580840","https://openalex.org/W4251199158"],"related_works":["https://openalex.org/W2766503024","https://openalex.org/W4206637278","https://openalex.org/W4386005305","https://openalex.org/W3173198409","https://openalex.org/W3082051559","https://openalex.org/W1682621979","https://openalex.org/W2781247653","https://openalex.org/W4220662019","https://openalex.org/W4308419594","https://openalex.org/W2020844573"],"abstract_inverted_index":{"The":[0,23,107,138],"uneven":[1],"distribution":[2,172],"of":[3,11,25,42,69,104,151,170],"classes":[4,71],"in":[5,31,48,75,96,101,153,176],"any":[6,20],"dataset":[7],"poses":[8],"a":[9,89],"tendency":[10],"biasness":[12],"toward":[13],"the":[14,26,53,102,142,156,168,171],"majority":[15],"class":[16,28,64,98],"when":[17],"analyzed":[18],"using":[19,117],"standard":[21],"classifier.":[22],"instances":[24],"significant":[27],"being":[29],"deficient":[30],"numbers":[32],"are":[33,58,144],"generally":[34],"ignored":[35],"and":[36,128,135,173],"their":[37],"correct":[38],"classification":[39,120],"which":[40],"is":[41,45,72,115],"paramount":[43],"interest":[44],"often":[46],"overlooked":[47],"calculating":[49],"overall":[50],"accuracy.":[51],"Therefore,":[52],"conventional":[54],"machine":[55],"learning":[56],"approaches":[57],"rigorously":[59],"refined":[60],"to":[61,80,155],"address":[62],"this":[63],"imbalance":[65,99],"problem.":[66],"This":[67,84,165],"challenge":[68],"imbalanced":[70,177],"more":[73],"prevalent":[74],"big":[76,105,178],"data":[77,179],"scenario":[78],"due":[79],"its":[81],"high":[82],"volume.":[83],"study":[85],"deals":[86],"with":[87,147],"acknowledging":[88],"sampling":[90,112,158],"solution":[91,159],"based":[92,131],"on":[93,132],"cluster":[94],"computing":[95],"handling":[97],"problems":[100],"case":[103],"data.":[106],"newly":[108],"proposed":[109],"approach":[110],"hybrid":[111],"algorithm":[113],"(HSA)":[114],"assessed":[116],"three":[118],"popular":[119],"algorithms":[121],"namely,":[122],"support":[123],"vector":[124],"machine,":[125],"decision":[126],"tree":[127],"k-nearest":[129],"neighbor":[130],"balanced":[133],"accuracy":[134],"elapsed":[136],"time.":[137],"results":[139],"obtained":[140],"from":[141],"experiment":[143],"considered":[145],"promising":[146],"an":[148],"efficiency":[149],"gain":[150],"42%":[152],"comparison":[154],"traditional":[157],"synthetic":[160],"minority":[161],"oversampling":[162],"technique":[163],"(SMOTE).":[164],"work":[166],"proves":[167],"effectiveness":[169],"clustering":[174],"principle":[175],"scenarios.":[180]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
