{"id":"https://openalex.org/W2294896629","doi":"https://doi.org/10.18293/seke2015-182","title":"Combining Feature Subset Selection and Data Sampling for Coping with Highly Imbalanced Software Data","display_name":"Combining Feature Subset Selection and Data Sampling for Coping with Highly Imbalanced Software Data","publication_year":2015,"publication_date":"2015-07-01","ids":{"openalex":"https://openalex.org/W2294896629","doi":"https://doi.org/10.18293/seke2015-182","mag":"2294896629"},"language":"en","primary_location":{"id":"doi:10.18293/seke2015-182","is_oa":true,"landing_page_url":"https://doi.org/10.18293/seke2015-182","pdf_url":"https://doi.org/10.18293/seke2015-182","source":{"id":"https://openalex.org/S4220650826","display_name":"Proceedings/Proceedings of the ... International Conference on Software Engineering and Knowledge Engineering","issn_l":"2325-9000","issn":["2325-9000","2325-9086"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Conferences on Software Engineering and Knowledge Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://doi.org/10.18293/seke2015-182","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109235962","display_name":"Kehan Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I203331390","display_name":"Eastern Connecticut State University","ror":"https://ror.org/01mhgwt57","country_code":"US","type":"education","lineage":["https://openalex.org/I203331390","https://openalex.org/I4210132746"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kehan Gao","raw_affiliation_strings":["Eastern Connecticut State University Willimantic, Connecticut 06226"],"affiliations":[{"raw_affiliation_string":"Eastern Connecticut State University Willimantic, Connecticut 06226","institution_ids":["https://openalex.org/I203331390"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089170562","display_name":"Taghi M. Khoshgoftaar","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taghi Khoshgoftaar","raw_affiliation_strings":["Florida Atlantic University Boca Raton, Florida 33431"],"affiliations":[{"raw_affiliation_string":"Florida Atlantic University Boca Raton, Florida 33431","institution_ids":["https://openalex.org/I63772739"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090913753","display_name":"Amri Napolitano","orcid":null},"institutions":[{"id":"https://openalex.org/I63772739","display_name":"Florida Atlantic University","ror":"https://ror.org/05p8w6387","country_code":"US","type":"education","lineage":["https://openalex.org/I63772739"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amri Napolitano","raw_affiliation_strings":["Florida Atlantic University Boca Raton, Florida 33431"],"affiliations":[{"raw_affiliation_string":"Florida Atlantic University Boca Raton, Florida 33431","institution_ids":["https://openalex.org/I63772739"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5109235962"],"corresponding_institution_ids":["https://openalex.org/I203331390"],"apc_list":null,"apc_paid":null,"fwci":5.5622,"has_fulltext":true,"cited_by_count":26,"citation_normalized_percentile":{"value":0.95986163,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"2015","issue":null,"first_page":"439","last_page":"444"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.7307165861129761},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7068123817443848},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5333449840545654},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5142590403556824},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4452268183231354},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38432708382606506},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36193302273750305}],"concepts":[{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.7307165861129761},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7068123817443848},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5333449840545654},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5142590403556824},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4452268183231354},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38432708382606506},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36193302273750305},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18293/seke2015-182","is_oa":true,"landing_page_url":"https://doi.org/10.18293/seke2015-182","pdf_url":"https://doi.org/10.18293/seke2015-182","source":{"id":"https://openalex.org/S4220650826","display_name":"Proceedings/Proceedings of the ... International Conference on Software Engineering and Knowledge Engineering","issn_l":"2325-9000","issn":["2325-9000","2325-9086"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Conferences on Software Engineering and Knowledge Engineering","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18293/seke2015-182","is_oa":true,"landing_page_url":"https://doi.org/10.18293/seke2015-182","pdf_url":"https://doi.org/10.18293/seke2015-182","source":{"id":"https://openalex.org/S4220650826","display_name":"Proceedings/Proceedings of the ... International Conference on Software Engineering and Knowledge Engineering","issn_l":"2325-9000","issn":["2325-9000","2325-9086"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Conferences on Software Engineering and Knowledge Engineering","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2294896629.pdf","grobid_xml":"https://content.openalex.org/works/W2294896629.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W1495061682","https://openalex.org/W1570448133","https://openalex.org/W1965895350","https://openalex.org/W1995806857","https://openalex.org/W2008235343","https://openalex.org/W2012908927","https://openalex.org/W2032751870","https://openalex.org/W2096945460","https://openalex.org/W2109676405","https://openalex.org/W2139749383","https://openalex.org/W2140187489","https://openalex.org/W2142481192","https://openalex.org/W2148143831","https://openalex.org/W2157690157","https://openalex.org/W2313957989","https://openalex.org/W2394622495","https://openalex.org/W2444283935","https://openalex.org/W4293077781","https://openalex.org/W6680806083"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4224009465","https://openalex.org/W4286629047","https://openalex.org/W4306321456","https://openalex.org/W4293525103","https://openalex.org/W2345184372","https://openalex.org/W3200179079","https://openalex.org/W2316780152","https://openalex.org/W3087493185"],"abstract_inverted_index":{"In":[0,59],"the":[1,26,39,42,93,111,136,147,179,192,208,217],"software":[2,30,44,168],"quality":[3,94],"modeling":[4],"process,":[5],"many":[6],"practitioners":[7],"often":[8],"ignore":[9],"problems":[10],"such":[11],"as":[12],"high":[13],"dimensionality":[14],"and":[15,54],"class":[16],"imbalance":[17],"that":[18,92,199],"exist":[19],"in":[20,51,66,178],"data":[21,73,138,149,210],"repositories.":[22],"They":[23],"directly":[24],"use":[25],"available":[27],"set":[28],"of":[29,41,56,84,88,95,115],"metrics":[31],"to":[32,38,48,75,131,143,203],"build":[33],"classification":[34],"models":[35,97],"without":[36],"regard":[37],"condition":[40],"underlying":[43],"measurement":[45],"data,":[46],"leading":[47],"a":[49,82,86,106,166],"decline":[50],"prediction":[52,96],"performance":[53],"extension":[55],"training":[57],"time.":[58],"this":[60],"study,":[61],"we":[62],"propose":[63],"an":[64],"approach,":[65],"which":[67],"feature":[68,132,144,154,182,188,204],"selection":[69,80,180,184,189],"is":[70,81],"combined":[71,190],"with":[72,191],"sampling,":[74],"overcome":[76],"these":[77,125],"problems.":[78],"Feature":[79],"process":[83],"choosing":[85],"subset":[87,183],"relevant":[89],"features":[90],"so":[91],"can":[98],"be":[99,121],"maintained":[100],"or":[101,113],"improved.":[102],"Data":[103],"sampling":[104,200],"seeks":[105],"more":[107],"balanced":[108],"dataset":[109],"through":[110],"addition":[112],"removal":[114],"instances.":[116],"Three":[117],"different":[118],"approaches":[119],"would":[120],"produced":[122],"when":[123],"combing":[124],"two":[126,219],"techniques:":[127],"1-sampling":[128],"performed":[129,141,152,201],"prior":[130,142,202],"selection,":[133,145,205],"but":[134,206],"retaining":[135,146,207],"unsampled":[137,209],"instances;":[139,150],"2-sampling":[140],"sampled":[148],"3-sampling":[151],"after":[153],"selection.":[155],"The":[156,196],"empirical":[157],"study":[158],"was":[159],"carried":[160],"out":[161],"on":[162],"six":[163],"datasets":[164],"from":[165],"real-world":[167],"system.":[169],"We":[170],"employed":[171],"one":[172],"filter-based":[173],"(no":[174],"learning":[175],"algorithm":[176],"involved":[177],"process)":[181],"technique":[185],"called":[186],"correlationbased":[187],"random":[193],"undersampling":[194],"method.":[195],"results":[197],"demonstrate":[198],"instances":[211],"(Approach":[212],"1)":[213],"performs":[214],"better":[215],"than":[216],"other":[218],"approaches.":[220]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
