{"id":"https://openalex.org/W4395702168","doi":"https://doi.org/10.1145/3603287.3651191","title":"Prediction Performance Analysis for ML Models Based on Impacts of Data Imbalance and Bias","display_name":"Prediction Performance Analysis for ML Models Based on Impacts of Data Imbalance and Bias","publication_year":2024,"publication_date":"2024-04-18","ids":{"openalex":"https://openalex.org/W4395702168","doi":"https://doi.org/10.1145/3603287.3651191"},"language":"en","primary_location":{"id":"doi:10.1145/3603287.3651191","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3603287.3651191","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 ACM Southeast Conference on ZZZ","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034723191","display_name":"Chunlan Gao","orcid":"https://orcid.org/0009-0003-1929-4037"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chunlan Gao","raw_affiliation_strings":["Georgia State University, Atlanta, Georgia, USA"],"raw_orcid":"https://orcid.org/0009-0003-1929-4037","affiliations":[{"raw_affiliation_string":"Georgia State University, Atlanta, Georgia, USA","institution_ids":["https://openalex.org/I181565077"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004590845","display_name":"Yong Shi","orcid":"https://orcid.org/0000-0002-3980-1425"},"institutions":[{"id":"https://openalex.org/I172980758","display_name":"Kennesaw State University","ror":"https://ror.org/00jeqjx33","country_code":"US","type":"education","lineage":["https://openalex.org/I172980758"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yong Shi","raw_affiliation_strings":["Kennesaw State University, Marietta, Georgia, USA"],"raw_orcid":"https://orcid.org/0000-0002-3980-1425","affiliations":[{"raw_affiliation_string":"Kennesaw State University, Marietta, Georgia, USA","institution_ids":["https://openalex.org/I172980758"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5034723191"],"corresponding_institution_ids":["https://openalex.org/I181565077"],"apc_list":null,"apc_paid":null,"fwci":0.6623,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.72670508,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"235","last_page":"240"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11653","display_name":"Financial Distress and Bankruptcy Prediction","score":0.9768000245094299,"subfield":{"id":"https://openalex.org/subfields/1402","display_name":"Accounting"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.968999981880188,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6309472322463989},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4565022885799408},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.34738534688949585},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.3313907980918884},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13314077258110046}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6309472322463989},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4565022885799408},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.34738534688949585},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.3313907980918884},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13314077258110046},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3603287.3651191","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3603287.3651191","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 ACM Southeast Conference on ZZZ","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4699999988079071,"id":"https://metadata.un.org/sdg/13","display_name":"Climate action"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1546047689","https://openalex.org/W1982120517","https://openalex.org/W1988790447","https://openalex.org/W2023450550","https://openalex.org/W2040181375","https://openalex.org/W2040693730","https://openalex.org/W2148143831","https://openalex.org/W2604243156","https://openalex.org/W2906988215","https://openalex.org/W2936503027","https://openalex.org/W2964093981","https://openalex.org/W2997591727","https://openalex.org/W3015997807","https://openalex.org/W3144804712","https://openalex.org/W3173198409","https://openalex.org/W3176376866","https://openalex.org/W4206335741","https://openalex.org/W6907693833"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"An":[0],"imbalanced":[1,39,78,96],"dataset":[2,126],"is":[3,44,134,139,149],"characterized":[4],"by":[5,38],"a":[6,45,66,99],"substantial":[7],"disparity":[8],"in":[9,41,76],"the":[10,28,35,88,106,117,125,129,136,140,144,169],"distribution":[11],"of":[12,27,69],"examples":[13],"among":[14],"its":[15],"classes,":[16],"with":[17],"one":[18],"class":[19],"containing":[20],"significantly":[21],"more":[22],"instances":[23],"than":[24,166],"others.":[25],"Most":[26],"credit":[29,82],"fraud":[30],"datasets":[31,40],"are":[32,163],"imbalanced.":[33],"Addressing":[34],"challenges":[36],"posed":[37],"classification":[42,50,107],"problems":[43],"complex":[46],"task,":[47],"as":[48],"many":[49],"algorithms":[51],"struggle":[52],"to":[53,72,81,94,124],"provide":[54],"satisfactory":[55],"performance":[56,75],"under":[57],"such":[58],"conditions.":[59],"In":[60,151],"this":[61],"article,":[62],"we":[63,86,104],"have":[64],"conducted":[65],"comparative":[67],"analysis":[68],"various":[70],"classifiers":[71],"assess":[73],"their":[74],"handling":[77],"data":[79,97],"related":[80],"card":[83],"fraud.":[84],"Then,":[85],"employed":[87],"Synthetic":[89],"Minority":[90],"Oversampling":[91],"Technique":[92],"(SMOTE)":[93],"synthesize":[95],"into":[98],"relatively":[100],"balanced":[101],"dataset.":[102],"Subsequently,":[103],"reevaluated":[105],"results":[108],"using":[109],"different":[110],"classifiers.":[111],"Ultimately,":[112],"our":[113],"findings":[114],"revealed":[115],"that":[116],"Naive":[118],"Bayes":[119],"classifier":[120,138],"was":[121],"less":[122],"sensitive":[123,142],"imbalance,":[127],"which":[128,162],"AUC":[130,145,160],"score":[131,146],"increase":[132,147],"rate":[133,148],"40.19%,":[135],"KNN":[137],"most":[141],"one,":[143],"61.27%.":[150],"all,":[152],"AdaBoost":[153],"and":[154],"Random":[155],"Forest":[156],"perform":[157],"much":[158],"higher":[159,165],"score,":[161],"both":[164],"95%":[167],"after":[168],"SMOTE.":[170]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
