{"id":"https://openalex.org/W4408339736","doi":"https://doi.org/10.1186/s40537-025-01120-x","title":"Unsupervised label generation for severely imbalanced fraud data","display_name":"Unsupervised label generation for severely imbalanced fraud data","publication_year":2025,"publication_date":"2025-03-11","ids":{"openalex":"https://openalex.org/W4408339736","doi":"https://doi.org/10.1186/s40537-025-01120-x"},"language":"en","primary_location":{"id":"doi:10.1186/s40537-025-01120-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-025-01120-x","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-025-01120-x","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-025-01120-x","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011476239","display_name":"Mary Anne Walauskis","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mary Anne Walauskis","raw_affiliation_strings":["College of Engineering and Computer Science, Florida Atlantic University, 777 Glades Road, 33431, Boca Raton, FL, USA"],"affiliations":[{"raw_affiliation_string":"College of Engineering and Computer Science, Florida Atlantic University, 777 Glades Road, 33431, Boca Raton, FL, USA","institution_ids":["https://openalex.org/I63772739"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089170562","display_name":"Taghi M. Khoshgoftaar","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taghi M. Khoshgoftaar","raw_affiliation_strings":["College of Engineering and Computer Science, Florida Atlantic University, 777 Glades Road, 33431, Boca Raton, FL, USA"],"affiliations":[{"raw_affiliation_string":"College of Engineering and Computer Science, Florida Atlantic University, 777 Glades Road, 33431, Boca Raton, FL, USA","institution_ids":["https://openalex.org/I63772739"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5011476239"],"corresponding_institution_ids":["https://openalex.org/I63772739"],"apc_list":{"value":1060,"currency":"GBP","value_usd":1300},"apc_paid":{"value":1060,"currency":"GBP","value_usd":1300},"fwci":21.8186,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.99235183,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"12","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7849182486534119},{"id":"https://openalex.org/keywords/computational-science-and-engineering","display_name":"Computational Science and Engineering","score":0.6156636476516724},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.41448774933815},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3839137554168701},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37413328886032104}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7849182486534119},{"id":"https://openalex.org/C68597687","wikidata":"https://www.wikidata.org/wiki/Q362601","display_name":"Computational Science and Engineering","level":2,"score":0.6156636476516724},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.41448774933815},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3839137554168701},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37413328886032104}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1186/s40537-025-01120-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-025-01120-x","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-025-01120-x","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:fe0651ce48794586ab19f8288e75b3c0","is_oa":true,"landing_page_url":"https://doaj.org/article/fe0651ce48794586ab19f8288e75b3c0","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Big Data, Vol 12, Iss 1, Pp 1-23 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1186/s40537-025-01120-x","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s40537-025-01120-x","pdf_url":"https://journalofbigdata.springeropen.com/counter/pdf/10.1186/s40537-025-01120-x","source":{"id":"https://openalex.org/S2737955091","display_name":"Journal Of Big Data","issn_l":"2196-1115","issn":["2196-1115"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Big Data","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.6000000238418579}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4408339736.pdf"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W2051627038","https://openalex.org/W2103018059","https://openalex.org/W2296719434","https://openalex.org/W2338318698","https://openalex.org/W2340896621","https://openalex.org/W2564754306","https://openalex.org/W2565047864","https://openalex.org/W2596052762","https://openalex.org/W2683833049","https://openalex.org/W2943758177","https://openalex.org/W2945876440","https://openalex.org/W2999309192","https://openalex.org/W3033548640","https://openalex.org/W3037381232","https://openalex.org/W3108391844","https://openalex.org/W3128837268","https://openalex.org/W3186605977","https://openalex.org/W3206152519","https://openalex.org/W4206735239","https://openalex.org/W4282946014","https://openalex.org/W4306317276","https://openalex.org/W4311543603","https://openalex.org/W4366308790","https://openalex.org/W4388115374","https://openalex.org/W4388229648","https://openalex.org/W4391935956","https://openalex.org/W4402642396"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W3046775127","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474"],"abstract_inverted_index":{"Many":[0],"datasets":[1,57],"remain":[2],"unlabeled":[3],"as":[4,34,45,127],"obtaining":[5],"labeled":[6],"data":[7],"for":[8,121,188,228],"machine":[9,25,259],"learning":[10,26,260],"is":[11,28,199],"frequently":[12],"expensive":[13],"and":[14,52,137,210,233,244,247,266],"necessitates":[15],"a":[16,84,106,146,162,182],"high":[17],"level":[18],"of":[19,78,109,118,161,184,193,251,270],"domain":[20],"expertise.":[21],"Another":[22],"challenge":[23,117,250],"facing":[24],"practitioners":[27],"class":[29,39,197,252,273],"imbalance.":[30],"In":[31,169],"domains":[32,265],"such":[33],"fraud":[35],"detection,":[36],"overcoming":[37],"significant":[38],"imbalance":[40],"presents":[41],"an":[42,79,94,132],"additional":[43],"difficulty,":[44],"seen":[46],"in":[47,59,131,138,257,262],"the":[48,68,76,101,116,153,159,167,173,194,223,239,249],"Credit":[49],"Card":[50],"Fraud":[51],"Medicare":[53],"Part":[54],"D":[55],"claims":[56],"used":[58],"this":[60],"work.":[61],"Our":[62,111,215,235],"novel":[63,236],"binary":[64],"labeling":[65,69,108,112,125],"method":[66,82],"automates":[67],"process,":[70],"with":[71,83],"minimal":[72],"expert":[73],"input,":[74],"using":[75,202],"combination":[77],"ensemble":[80],"unsupervised":[81,134],"percentile":[85],"thresholding":[86],"technique.":[87],"The":[88,191],"labels":[89,120,198,246],"are":[90],"further":[91],"refined":[92],"through":[93],"iterative":[95],"minimization":[96],"process":[97],"that":[98],"selects":[99],"only":[100],"highest-confidence":[102],"instances":[103,126],"to":[104,140,165,171,241],"receive":[105],"final":[107],"fraudulent.":[110],"approach":[113,220],"successfully":[114],"overcomes":[115],"generating":[119],"severely":[122],"imbalanced":[123,264],"data,":[124],"fraudulent":[128],"or":[129],"not,":[130],"entirely":[133],"framework.":[135],"Additionally,":[136],"contrast":[139],"conventional":[141],"methods,":[142],"our":[143,219],"methodology":[144,237],"provides":[145],"more":[147,267],"efficient":[148,268],"evaluation":[149,204,269],"by":[150],"directly":[151],"assessing":[152],"generated":[154,196,272],"labels\u2019":[155],"efficacy":[156],"without":[157],"requiring":[158],"training":[160],"supervised":[163],"classifier":[164],"evaluate":[166],"labels.":[168,274],"order":[170],"examine":[172],"effect":[174],"on":[175],"label":[176],"efficacy,":[177],"we":[178],"report":[179],"results":[180,217],"across":[181],"range":[183],"positive":[185,230],"instance":[186,231],"levels":[187,232],"each":[189],"dataset.":[190],"quality":[192],"newly":[195,271],"thoroughly":[200],"assessed":[201],"three":[203],"metrics:":[205],"Jaccard":[206],"Index":[207],"(JI),":[208],"Precision,":[209],"Matthews":[211],"Correlation":[212],"Coefficient":[213],"(MCC).":[214],"empirical":[216],"demonstrate":[218],"consistently":[221],"outperforms":[222],"baseline,":[224],"Isolation":[225],"Forest":[226],"(IF),":[227],"all":[229],"metrics.":[234],"demonstrates":[238],"ability":[240],"provide":[242],"accurate":[243],"robust":[245],"overcome":[248],"imbalance,":[253],"which":[254],"could":[255],"result":[256],"better":[258],"applications":[261],"highly":[263]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":8}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
