{"id":"https://openalex.org/W3134770171","doi":"https://doi.org/10.1109/tse.2021.3063727","title":"Data Quality Matters: A Case Study on Data Label Correctness for Security Bug Report Prediction","display_name":"Data Quality Matters: A Case Study on Data Label Correctness for Security Bug Report Prediction","publication_year":2021,"publication_date":"2021-03-05","ids":{"openalex":"https://openalex.org/W3134770171","doi":"https://doi.org/10.1109/tse.2021.3063727","mag":"3134770171"},"language":"en","primary_location":{"id":"doi:10.1109/tse.2021.3063727","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tse.2021.3063727","pdf_url":null,"source":{"id":"https://openalex.org/S8351582","display_name":"IEEE Transactions on Software Engineering","issn_l":"0098-5589","issn":["0098-5589","1939-3520","2326-3881"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035031650","display_name":"Xiaoxue Wu","orcid":"https://orcid.org/0000-0002-7567-3643"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaoxue Wu","raw_affiliation_strings":["School of Cyberspace Security, Northwestern Polytechnical University, Xi&#x0027;an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Cyberspace Security, Northwestern Polytechnical University, Xi&#x0027;an, Shaanxi, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061098741","display_name":"Wei Zheng","orcid":"https://orcid.org/0000-0001-7969-1630"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Zheng","raw_affiliation_strings":["School of Software, Northwestern Polytechnical University, Xi&#x0027;an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Northwestern Polytechnical University, Xi&#x0027;an, Shaanxi, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006669765","display_name":"Xin Xia","orcid":"https://orcid.org/0000-0002-6302-3256"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xin Xia","raw_affiliation_strings":["Faculty of Information Technology, Monash University, Melbourne, Victoria, Australia"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, Monash University, Melbourne, Victoria, Australia","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081036622","display_name":"David Lo","orcid":"https://orcid.org/0000-0002-4367-7201"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"David Lo","raw_affiliation_strings":["School of Information Systems, Singapore Management University, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Information Systems, Singapore Management University, Singapore","institution_ids":["https://openalex.org/I79891267"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5035031650"],"corresponding_institution_ids":["https://openalex.org/I17145004"],"apc_list":null,"apc_paid":null,"fwci":30.4504,"has_fulltext":false,"cited_by_count":123,"citation_normalized_percentile":{"value":0.99744852,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"48","issue":"7","first_page":"2541","last_page":"2556"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.9149196743965149},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8213627934455872},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5307350754737854},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5290559530258179},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5160350799560547},{"id":"https://openalex.org/keywords/predictive-modelling","display_name":"Predictive modelling","score":0.4696233570575714},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.45913392305374146},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4149317145347595},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.2007371187210083},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12435862421989441}],"concepts":[{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.9149196743965149},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8213627934455872},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5307350754737854},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5290559530258179},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5160350799560547},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.4696233570575714},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.45913392305374146},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4149317145347595},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2007371187210083},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12435862421989441},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tse.2021.3063727","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tse.2021.3063727","pdf_url":null,"source":{"id":"https://openalex.org/S8351582","display_name":"IEEE Transactions on Software Engineering","issn_l":"0098-5589","issn":["0098-5589","1939-3520","2326-3881"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Software Engineering","raw_type":"journal-article"},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-8439","is_oa":false,"landing_page_url":"https://ink.library.smu.edu.sg/cgi/viewcontent.cgi?article=8439&amp;amp;context=sis_research","pdf_url":null,"source":{"id":"https://openalex.org/S4377196871","display_name":"Institutional Knowledge (InK) - Institutional Knowledge at Singapore Management University (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1109/TSE.2021.3063727","raw_type":"Journal Article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":70,"referenced_works":["https://openalex.org/W1827907817","https://openalex.org/W1975879668","https://openalex.org/W1978365593","https://openalex.org/W1987289444","https://openalex.org/W1987843766","https://openalex.org/W2023511777","https://openalex.org/W2065053490","https://openalex.org/W2067490448","https://openalex.org/W2067691588","https://openalex.org/W2097883090","https://openalex.org/W2101726875","https://openalex.org/W2109540106","https://openalex.org/W2112646985","https://openalex.org/W2123493477","https://openalex.org/W2126166995","https://openalex.org/W2126513753","https://openalex.org/W2148143831","https://openalex.org/W2148615889","https://openalex.org/W2154183829","https://openalex.org/W2154398797","https://openalex.org/W2155617465","https://openalex.org/W2156194072","https://openalex.org/W2157353183","https://openalex.org/W2158439356","https://openalex.org/W2159908132","https://openalex.org/W2163837601","https://openalex.org/W2172232422","https://openalex.org/W2200054515","https://openalex.org/W2286236884","https://openalex.org/W2408181256","https://openalex.org/W2535189740","https://openalex.org/W2560436489","https://openalex.org/W2570487166","https://openalex.org/W2578208870","https://openalex.org/W2593675739","https://openalex.org/W2741600166","https://openalex.org/W2757656223","https://openalex.org/W2773017930","https://openalex.org/W2775555297","https://openalex.org/W2781021471","https://openalex.org/W2809303669","https://openalex.org/W2887004133","https://openalex.org/W2888312537","https://openalex.org/W2888320967","https://openalex.org/W2898124301","https://openalex.org/W2900832447","https://openalex.org/W2903799441","https://openalex.org/W2931122162","https://openalex.org/W2946143226","https://openalex.org/W2951710749","https://openalex.org/W2954552517","https://openalex.org/W2963548617","https://openalex.org/W2964279097","https://openalex.org/W2966158888","https://openalex.org/W2967289945","https://openalex.org/W2967556797","https://openalex.org/W2990257819","https://openalex.org/W2999118008","https://openalex.org/W3047958222","https://openalex.org/W3089659633","https://openalex.org/W3099095494","https://openalex.org/W4211116959","https://openalex.org/W4234367654","https://openalex.org/W4237256801","https://openalex.org/W4243379839","https://openalex.org/W4250023757","https://openalex.org/W4301168982","https://openalex.org/W6638594544","https://openalex.org/W6763203725","https://openalex.org/W6784319041"],"related_works":["https://openalex.org/W3008339103","https://openalex.org/W2404647514","https://openalex.org/W1667647204","https://openalex.org/W4247536566","https://openalex.org/W3119814709","https://openalex.org/W2018477250","https://openalex.org/W1508895727","https://openalex.org/W4390939596","https://openalex.org/W4389829534","https://openalex.org/W4317600379"],"abstract_inverted_index":{"In":[0,122],"the":[1,24,28,42,53,64,86,97,116,128,156,164,167,172,179,190,197,205,221,229,234,237,245],"research":[2,117],"of":[3,14,23,30,44,89,93,99,119,131,158,166,199,204],"mining":[4],"software":[5],"repositories,":[6],"we":[7,58,125,142],"need":[8],"to":[9,16,40,85],"label":[10,129,160],"a":[11,18,31,48,60],"large":[12],"amount":[13],"data":[15],"construct":[17],"predictive":[19,49],"model.":[20,50],"The":[21,186,202],"correctness":[22,130,161],"labels":[25],"will":[26],"affect":[27],"performance":[29,88,165,198,203],"model":[32],"substantially.":[33],"However,":[34],"limited":[35],"studies":[36,95],"have":[37],"been":[38],"performed":[39],"investigate":[41],"impact":[43],"mislabeled":[45,81,149],"instances":[46],"on":[47,63,170,220,228],"To":[51],"bridge":[52],"gap,":[54],"in":[55,194,196],"this":[56,123],"article,":[57,124],"perform":[59],"case":[61],"study":[62],"security":[65,246],"bug":[66,139],"report":[67],"(SBR)":[68],"prediction.":[69,121],"We":[70,153],"found":[71],"five":[72,133],"publicly":[73],"available":[74],"datasets":[75,134,159,192,223],"for":[76],"SBR":[77,90,120],"prediction":[78,91],"contains":[79],"many":[80],"instances,":[82],"which":[83,146],"lead":[84],"poor":[87],"models":[92,169,241],"recent":[94],"(e.g.,":[96],"work":[98],"Peters":[100,209,251],"<italic":[101,107,210,216,252,258],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[102,108,211,217,253,259],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">et":[103,109,212,218,254,260],"al.</i>":[104,110,213,219,255,261],"and":[105,141,178,214,256],"Shu":[106,215,257],").":[111],"Furthermore,":[112,232],"it":[113],"might":[114],"mislead":[115],"direction":[118],"first":[126],"improve":[127],"these":[132],"by":[135,162,208,250],"manually":[136],"analyzing":[137],"each":[138],"report,":[140],"find":[143],"749":[144],"SBRs,":[145],"are":[147],"originally":[148],"as":[150],"Non-SBRs":[151],"(NSBRs).":[152],"then":[154],"evaluate":[155],"impacts":[157],"comparing":[163],"classification":[168,200,240],"both":[171],"noisy":[173,230],"(i.e.,":[174,181],"before":[175],"our":[176,183],"correction)":[177,184],"clean":[180,222,235],"after":[182],"datasets.":[185,231],"results":[187],"show":[188],"that":[189],"cleaned":[191],"result":[193],"improvement":[195],"models.":[201],"approaches":[206,248],"proposed":[207],"is":[224],"much":[225],"better":[226],"than":[227],"with":[233],"datasets,":[236],"simple":[238],"text":[239],"could":[242],"significantly":[243],"outperform":[244],"keywords-matrix-based":[247],"applied":[249]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":29},{"year":2023,"cited_by_count":27},{"year":2022,"cited_by_count":47},{"year":2021,"cited_by_count":4}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
