{"id":"https://openalex.org/W2964279097","doi":"https://doi.org/10.1109/tse.2019.2929761","title":"The Impact of Mislabeled Changes by SZZ on Just-in-Time Defect Prediction","display_name":"The Impact of Mislabeled Changes by SZZ on Just-in-Time Defect Prediction","publication_year":2019,"publication_date":"2019-07-18","ids":{"openalex":"https://openalex.org/W2964279097","doi":"https://doi.org/10.1109/tse.2019.2929761","mag":"2964279097"},"language":"en","primary_location":{"id":"doi:10.1109/tse.2019.2929761","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tse.2019.2929761","pdf_url":null,"source":{"id":"https://openalex.org/S8351582","display_name":"IEEE Transactions on Software Engineering","issn_l":"0098-5589","issn":["0098-5589","1939-3520","2326-3881"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://ink.library.smu.edu.sg/sis_research/4494","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043400199","display_name":"Yuanrui Fan","orcid":"https://orcid.org/0000-0001-5234-0152"},"institutions":[{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuanrui Fan","raw_affiliation_strings":["College of Computer Science and Technology, Ningbo Research Institute, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Ningbo Research Institute, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006669765","display_name":"Xin Xia","orcid":"https://orcid.org/0000-0002-6302-3256"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xin Xia","raw_affiliation_strings":["Faculty of Information Technology, Monash University, Melbourne, VIC, Australia"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, Monash University, Melbourne, VIC, Australia","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052196896","display_name":"Daniel Alencar da Costa","orcid":"https://orcid.org/0000-0003-4525-3266"},"institutions":[{"id":"https://openalex.org/I80281795","display_name":"University of Otago","ror":"https://ror.org/01jmxt844","country_code":"NZ","type":"education","lineage":["https://openalex.org/I80281795"]}],"countries":["NZ"],"is_corresponding":false,"raw_author_name":"Daniel Alencar da Costa","raw_affiliation_strings":["Information Science Department, University of Otago, Dunedin, New Zealand"],"affiliations":[{"raw_affiliation_string":"Information Science Department, University of Otago, Dunedin, New Zealand","institution_ids":["https://openalex.org/I80281795"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081036622","display_name":"David Lo","orcid":"https://orcid.org/0000-0002-4367-7201"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"David Lo","raw_affiliation_strings":["School of Information Systems, Singapore Management University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Information Systems, Singapore Management University, Singapore, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091586373","display_name":"Ahmed E. Hassan","orcid":"https://orcid.org/0000-0001-7749-5513"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ahmed E. Hassan","raw_affiliation_strings":["School of Computing, Queen's University, Kingston, ON, Canada"],"affiliations":[{"raw_affiliation_string":"School of Computing, Queen's University, Kingston, ON, Canada","institution_ids":["https://openalex.org/I204722609"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114429788","display_name":"Shanping Li","orcid":"https://orcid.org/0000-0003-2615-9792"},"institutions":[{"id":"https://openalex.org/I109935558","display_name":"Ningbo University","ror":"https://ror.org/03et85d35","country_code":"CN","type":"education","lineage":["https://openalex.org/I109935558"]},{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shanping Li","raw_affiliation_strings":["College of Computer Science and Technology, Ningbo Research Institute, Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Ningbo Research Institute, Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":5,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5043400199"],"corresponding_institution_ids":["https://openalex.org/I109935558","https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":16.2008,"has_fulltext":false,"cited_by_count":96,"citation_normalized_percentile":{"value":0.99031838,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"47","issue":"8","first_page":"1559","last_page":"1586"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7353182435035706},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.666861891746521},{"id":"https://openalex.org/keywords/predictive-modelling","display_name":"Predictive modelling","score":0.6530054807662964},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5168624520301819},{"id":"https://openalex.org/keywords/performance-metric","display_name":"Performance metric","score":0.5027248859405518},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.495489239692688},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.46775341033935547},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4514082074165344},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43117833137512207},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4139534533023834}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7353182435035706},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.666861891746521},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.6530054807662964},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5168624520301819},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.5027248859405518},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.495489239692688},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46775341033935547},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4514082074165344},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43117833137512207},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4139534533023834},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tse.2019.2929761","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tse.2019.2929761","pdf_url":null,"source":{"id":"https://openalex.org/S8351582","display_name":"IEEE Transactions on Software Engineering","issn_l":"0098-5589","issn":["0098-5589","1939-3520","2326-3881"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Software Engineering","raw_type":"journal-article"},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-5497","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/4494","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1109/TSE.2019.2929761","raw_type":"Journal Article"}],"best_oa_location":{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-5497","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/4494","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1109/TSE.2019.2929761","raw_type":"Journal Article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7128210980","display_name":null,"funder_award_id":"2018YFB1003904","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":96,"referenced_works":["https://openalex.org/W85350352","https://openalex.org/W273955616","https://openalex.org/W1555168845","https://openalex.org/W1570437003","https://openalex.org/W1655956671","https://openalex.org/W1902482618","https://openalex.org/W1968745662","https://openalex.org/W1972978214","https://openalex.org/W1978859404","https://openalex.org/W1987843766","https://openalex.org/W1987855178","https://openalex.org/W1989354793","https://openalex.org/W1994248747","https://openalex.org/W1994493193","https://openalex.org/W1995945562","https://openalex.org/W2000679946","https://openalex.org/W2006407062","https://openalex.org/W2007705030","https://openalex.org/W2010398592","https://openalex.org/W2019348938","https://openalex.org/W2027483357","https://openalex.org/W2050496630","https://openalex.org/W2067490448","https://openalex.org/W2073649165","https://openalex.org/W2074805796","https://openalex.org/W2093897789","https://openalex.org/W2096451472","https://openalex.org/W2099593355","https://openalex.org/W2100310618","https://openalex.org/W2104329051","https://openalex.org/W2105672266","https://openalex.org/W2105776892","https://openalex.org/W2107277166","https://openalex.org/W2110229593","https://openalex.org/W2110653426","https://openalex.org/W2115105080","https://openalex.org/W2115709314","https://openalex.org/W2118949539","https://openalex.org/W2120703352","https://openalex.org/W2126166995","https://openalex.org/W2129164226","https://openalex.org/W2132887549","https://openalex.org/W2135268264","https://openalex.org/W2140785063","https://openalex.org/W2143637886","https://openalex.org/W2146335723","https://openalex.org/W2147386665","https://openalex.org/W2149783794","https://openalex.org/W2150874999","https://openalex.org/W2151666086","https://openalex.org/W2153887189","https://openalex.org/W2157353183","https://openalex.org/W2158744032","https://openalex.org/W2164641534","https://openalex.org/W2172232422","https://openalex.org/W2276400542","https://openalex.org/W2312398278","https://openalex.org/W2330210193","https://openalex.org/W2408181256","https://openalex.org/W2474835145","https://openalex.org/W2530824252","https://openalex.org/W2534933448","https://openalex.org/W2548915941","https://openalex.org/W2594132308","https://openalex.org/W2599212561","https://openalex.org/W2604794021","https://openalex.org/W2605547445","https://openalex.org/W2606150376","https://openalex.org/W2729440153","https://openalex.org/W2767894374","https://openalex.org/W2787379525","https://openalex.org/W2796283679","https://openalex.org/W2805001156","https://openalex.org/W2808113972","https://openalex.org/W2887004133","https://openalex.org/W2911964244","https://openalex.org/W2963520355","https://openalex.org/W2963548617","https://openalex.org/W3105203384","https://openalex.org/W3175417087","https://openalex.org/W4213251304","https://openalex.org/W4236586490","https://openalex.org/W4237979974","https://openalex.org/W4243379839","https://openalex.org/W4252684946","https://openalex.org/W4399568670","https://openalex.org/W4399637125","https://openalex.org/W4399649413","https://openalex.org/W6603460400","https://openalex.org/W6610017368","https://openalex.org/W6667573648","https://openalex.org/W6677998073","https://openalex.org/W6684088068","https://openalex.org/W6869267393","https://openalex.org/W6869544649","https://openalex.org/W6869665370"],"related_works":["https://openalex.org/W4361804730","https://openalex.org/W2142113611","https://openalex.org/W2334467465","https://openalex.org/W2087870008","https://openalex.org/W2162534555","https://openalex.org/W2752178021","https://openalex.org/W2107419853","https://openalex.org/W2143024819","https://openalex.org/W4247159817","https://openalex.org/W2964201926"],"abstract_inverted_index":{"Just-in-Time":[0],"(JIT)":[1],"defect":[2,18,59,90],"prediction-a":[3],"technique":[4],"which":[5],"aims":[6],"to":[7,24,55,133,243,285,302,341,358],"predict":[8],"bugs":[9],"at":[10],"change":[11],"level-has":[12],"been":[13],"paid":[14],"more":[15,290,304,316,356],"attention.":[16],"JIT":[17,58,89],"prediction":[19,60,66,91,113,150,163],"leverages":[20],"the":[21,32,65,73,76,84,112,116,125,140,148,156,162,167,173,179,185,189,193,199,233,250,278,296,323,342,349,362,371],"SZZ":[22,35,47,81,96,122,127,196],"approach":[23],"identify":[25],"bug-introducing":[26,329],"changes.":[27],"Recently,":[28],"researchers":[29],"found":[30],"that":[31,52,104,221,322],"performance":[33,85,180,226,247,259],"of":[34,45,75,88,147,184,210,224,265,333],"(including":[36],"its":[37],"variants)":[38],"is":[39,130,339,368],"impacted":[40,360],"by":[41,79,107,119,143,170,192,236,253,274,299,310,346,361],"a":[42,57,203,208,245,256,337],"large":[43],"amount":[44],"noise.":[46],"may":[48],"considerably":[49,315],"mislabel":[50],"changes":[51,78,212,235,252,279,298,309,330],"are":[53,105,152,240,355],"used":[54,369],"train":[56],"model,":[61,175],"and":[62,86,102,137,176,181,231,238,282,287,351],"thus":[63],"impact":[64,74],"accuracy.":[67],"In":[68],"this":[69],"paper,":[70],"we":[71,138,177,219],"investigate":[72],"mislabeled":[77,135,234,251,280,297,308],"different":[80],"variants":[82,97,197],"on":[83,155,166,207],"interpretation":[87],"models.":[92],"We":[93,110,160,319],"analyze":[94],"four":[95,121,126,149],"(i.e.,":[98,331],"B-SZZ,":[99],"AG-SZZ,":[100],"MA-SZZ,":[101],"RA-SZZ)":[103],"proposed":[106],"prior":[108],"studies.":[109],"build":[111],"models":[114,151,186],"using":[115,144,188],"labeled":[117,168,190],"data":[118,169,191],"these":[120],"variants.":[123],"Among":[124],"variants,":[128],"RA-SZZ":[129,171],"least":[131],"likely":[132,242,357],"generate":[134],"changes,":[136],"construct":[139],"testing":[141,158],"set":[142],"RA-SZZ.":[145],"All":[146],"then":[153],"evaluated":[154],"same":[157],"set.":[159],"choose":[161],"model":[164],"built":[165],"as":[172,370],"baseline":[174,200],"compare":[178],"metric":[182,326],"importance":[183],"trained":[187],"other":[194],"three":[195],"with":[198,261],"model.":[201],"Through":[202],"large-scale":[204],"empirical":[205],"study":[206],"total":[209],"126,526":[211],"from":[213],"ten":[214],"Apache":[215],"open":[216],"source":[217],"projects,":[218],"find":[220,321],"in":[222,276,336],"terms":[223],"various":[225],"measures":[227],"(AUC,":[228],"F1-score,":[229],"G-mean":[230],"Recall@20%),":[232],"B-SZZ":[237,281,300],"MA-SZZ":[239,311],"not":[241,313],"cause":[244,255,314],"considerable":[246],"reduction,":[248],"while":[249],"AG-SZZ":[254,283],"statistically":[257],"significant":[258],"reduction":[260],"an":[262],"average":[263],"difference":[264],"1-5":[266],"percent.":[267],"When":[268],"considering":[269],"developers'":[270],"inspection":[271,292],"effort":[272],"(measured":[273],"LOC)":[275],"practice,":[277],"lead":[284,301],"9-10":[286],"1-15":[288],"percent":[289],"wasted":[291,305,317],"effort,":[293],"respectively.":[294],"And":[295],"significantly":[303],"effort.":[306,318],"The":[307],"do":[312],"also":[320],"top-most":[324],"important":[325,353],"for":[327],"identifying":[328],"number":[332],"files":[334],"modified":[335],"change)":[338],"robust":[340],"mislabeling":[343,363],"noise":[344],"generated":[345],"SZZ.":[347],"But":[348],"second-":[350],"third-most":[352],"metrics":[354],"be":[359],"noise,":[364],"unless":[365],"random":[366],"forest":[367],"underlying":[372],"classifier.":[373]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":18},{"year":2023,"cited_by_count":21},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":21},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
