{"id":"https://openalex.org/W2795013017","doi":"https://doi.org/10.1145/3180155.3180197","title":"Is \"better data\" better than \"better data miners\"?","display_name":"Is \"better data\" better than \"better data miners\"?","publication_year":2018,"publication_date":"2018-05-27","ids":{"openalex":"https://openalex.org/W2795013017","doi":"https://doi.org/10.1145/3180155.3180197","mag":"2795013017"},"language":"en","primary_location":{"id":"doi:10.1145/3180155.3180197","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3180155.3180197","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 40th International Conference on Software Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004481316","display_name":"Amritanshu Agrawal","orcid":"https://orcid.org/0000-0002-1220-8533"},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Amritanshu Agrawal","raw_affiliation_strings":["North Carolina State University"],"affiliations":[{"raw_affiliation_string":"North Carolina State University","institution_ids":["https://openalex.org/I137902535"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077008083","display_name":"Tim Menzies","orcid":"https://orcid.org/0000-0002-5040-3196"},"institutions":[{"id":"https://openalex.org/I137902535","display_name":"North Carolina State University","ror":"https://ror.org/04tj63d06","country_code":"US","type":"education","lineage":["https://openalex.org/I137902535"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tim Menzies","raw_affiliation_strings":["North Carolina State University"],"affiliations":[{"raw_affiliation_string":"North Carolina State University","institution_ids":["https://openalex.org/I137902535"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5004481316"],"corresponding_institution_ids":["https://openalex.org/I137902535"],"apc_list":null,"apc_paid":null,"fwci":34.134,"has_fulltext":false,"cited_by_count":159,"citation_normalized_percentile":{"value":0.99652866,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1050","last_page":"1061"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7815110683441162},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.5927379727363586},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5915979146957397},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5911890864372253},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5858123898506165},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5406084060668945},{"id":"https://openalex.org/keywords/software-quality","display_name":"Software quality","score":0.5279404520988464},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5088565349578857},{"id":"https://openalex.org/keywords/source-lines-of-code","display_name":"Source lines of code","score":0.448763370513916},{"id":"https://openalex.org/keywords/software-bug","display_name":"Software bug","score":0.4319559931755066},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.4111725389957428},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.16001001000404358}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7815110683441162},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.5927379727363586},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5915979146957397},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5911890864372253},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5858123898506165},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5406084060668945},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.5279404520988464},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5088565349578857},{"id":"https://openalex.org/C199519371","wikidata":"https://www.wikidata.org/wiki/Q942695","display_name":"Source lines of code","level":3,"score":0.448763370513916},{"id":"https://openalex.org/C1009929","wikidata":"https://www.wikidata.org/wiki/Q179550","display_name":"Software bug","level":3,"score":0.4319559931755066},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.4111725389957428},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.16001001000404358},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3180155.3180197","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3180155.3180197","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 40th International Conference on Software Engineering","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":87,"referenced_works":["https://openalex.org/W150336312","https://openalex.org/W998146211","https://openalex.org/W1535128201","https://openalex.org/W1582810276","https://openalex.org/W1595159159","https://openalex.org/W1708394971","https://openalex.org/W1964062576","https://openalex.org/W1966180580","https://openalex.org/W1975620021","https://openalex.org/W1990748933","https://openalex.org/W1994493193","https://openalex.org/W2005504865","https://openalex.org/W2008253679","https://openalex.org/W2008596407","https://openalex.org/W2017439596","https://openalex.org/W2019338079","https://openalex.org/W2038464048","https://openalex.org/W2057780988","https://openalex.org/W2073767361","https://openalex.org/W2082314767","https://openalex.org/W2084085041","https://openalex.org/W2086095861","https://openalex.org/W2094764356","https://openalex.org/W2100034782","https://openalex.org/W2101234009","https://openalex.org/W2103715428","https://openalex.org/W2104882279","https://openalex.org/W2105300539","https://openalex.org/W2105776892","https://openalex.org/W2108664164","https://openalex.org/W2111421634","https://openalex.org/W2118283821","https://openalex.org/W2118978333","https://openalex.org/W2119168155","https://openalex.org/W2120457925","https://openalex.org/W2125999269","https://openalex.org/W2126957654","https://openalex.org/W2130883460","https://openalex.org/W2135198476","https://openalex.org/W2136706100","https://openalex.org/W2137235241","https://openalex.org/W2137267962","https://openalex.org/W2137955320","https://openalex.org/W2138428785","https://openalex.org/W2139749383","https://openalex.org/W2143085507","https://openalex.org/W2143637886","https://openalex.org/W2148143831","https://openalex.org/W2151666086","https://openalex.org/W2158744032","https://openalex.org/W2158864412","https://openalex.org/W2160958420","https://openalex.org/W2160988203","https://openalex.org/W2163732854","https://openalex.org/W2163837601","https://openalex.org/W2164262889","https://openalex.org/W2165533158","https://openalex.org/W2171824022","https://openalex.org/W2172232422","https://openalex.org/W2338785498","https://openalex.org/W2344072768","https://openalex.org/W2367798545","https://openalex.org/W2508870277","https://openalex.org/W2582827793","https://openalex.org/W2585284977","https://openalex.org/W2593492524","https://openalex.org/W2593675739","https://openalex.org/W2594132308","https://openalex.org/W2627491027","https://openalex.org/W2737319015","https://openalex.org/W2797563284","https://openalex.org/W2799061466","https://openalex.org/W2799148064","https://openalex.org/W2952812854","https://openalex.org/W2952845057","https://openalex.org/W2997591727","https://openalex.org/W3099095494","https://openalex.org/W3105203384","https://openalex.org/W3141989311","https://openalex.org/W4236586490","https://openalex.org/W4245807786","https://openalex.org/W4250023757","https://openalex.org/W4252675558","https://openalex.org/W4293684564","https://openalex.org/W4293860453","https://openalex.org/W4310288542","https://openalex.org/W4310299640"],"related_works":["https://openalex.org/W1992074508","https://openalex.org/W2086523506","https://openalex.org/W3004570974","https://openalex.org/W4401359429","https://openalex.org/W4381198270","https://openalex.org/W4288080276","https://openalex.org/W3105287819","https://openalex.org/W4283697402","https://openalex.org/W2791411508","https://openalex.org/W2186856397"],"abstract_inverted_index":{"We":[0],"report":[1],"and":[2,27,103,107,135,173],"fix":[3],"an":[4],"important":[5,161],"systematic":[6],"error":[7],"in":[8,35,74,80],"prior":[9],"studies":[10,18,167],"that":[11],"ranked":[12],"classifiers":[13,23],"for":[14,85,120,148,180],"software":[15,75,149],"analytics.":[16],"Those":[17],"did":[19,29],"not":[20,30],"(a)":[21,45],"assess":[22],"on":[24],"multiple":[25],"criteria":[26],"they":[28],"(b)":[31,49],"study":[32,84],"how":[33],"variations":[34],"the":[36,39,51,55,115,140],"data":[37,57,156],"affect":[38],"results.":[40],"Hence,":[41],"this":[42],"paper":[43],"applies":[44],"multi-criteria":[46],"tests":[47],"while":[48],"fixing":[50],"weaker":[52],"regions":[53],"of":[54,65,94,114,124,133],"training":[56],"(using":[58],"SMOTUNED,":[59],"which":[60],"is":[61,176],"a":[62,81,91,130,177],"self-tuning":[63],"version":[64],"SMOTE).":[66],"This":[67],"approach":[68],"leads":[69],"to":[70,118],"dramatically":[71],"large":[72],"increases":[73],"defect":[76,153],"predictions.":[77],"When":[78],"applied":[79],"5*5":[82],"cross-validation":[83],"3,681":[86],"JAVA":[87],"classes":[88],"(containing":[89],"over":[90],"million":[92],"lines":[93],"code)":[95],"from":[96],"open":[97],"source":[98],"systems,":[99],"SMOTUNED":[100,136,175],"increased":[101],"AUC":[102],"recall":[104],"by":[105],"60%":[106],"20%":[108],"respectively.":[109],"These":[110],"improvements":[111],"are":[112,168],"independent":[113],"classifier":[116,163],"used":[117],"predict":[119],"quality.":[121],"Same":[122],"kind":[123],"pattern":[125],"(improvement)":[126],"was":[127,137],"observed":[128],"when":[129],"comparative":[131],"analysis":[132],"SMOTE":[134],"done":[138],"against":[139],"most":[141],"recent":[142],"class":[143],"imbalance":[144],"technique.":[145],"In":[146],"conclusion,":[147],"analytic":[150],"tasks":[151],"like":[152],"prediction,":[154],"(1)":[155],"pre-processing":[157],"can":[158],"be":[159],"more":[160],"than":[162],"choice,":[164],"(2)":[165],"ranking":[166],"incomplete":[169],"without":[170],"such":[171],"pre-processing,":[172],"(3)":[174],"promising":[178],"candidate":[179],"pre-processing.":[181]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":15},{"year":2022,"cited_by_count":33},{"year":2021,"cited_by_count":35},{"year":2020,"cited_by_count":25},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":11},{"year":2017,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2018-04-06T00:00:00"}
