{"id":"https://openalex.org/W4385488629","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191386","title":"An Empirical Study on Data Balancing in Machine Learning Based Software Traceability Methods","display_name":"An Empirical Study on Data Balancing in Machine Learning Based Software Traceability Methods","publication_year":2023,"publication_date":"2023-06-18","ids":{"openalex":"https://openalex.org/W4385488629","doi":"https://doi.org/10.1109/ijcnn54540.2023.10191386"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn54540.2023.10191386","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10191386","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024168142","display_name":"Bangchao Wang","orcid":"https://orcid.org/0000-0001-6920-1810"},"institutions":[{"id":"https://openalex.org/I4210119942","display_name":"Wuhan Textile University","ror":"https://ror.org/02jgsf398","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210119942"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bangchao Wang","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan Textile University,Wuhan,China","Engineering Research Center of Hubei Province for Clothing Information, Wuhan Textile University, Wuhan, China","School of Computer Science and Artificial Intelligence, Wuhan Textile University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan Textile University,Wuhan,China","institution_ids":["https://openalex.org/I4210119942"]},{"raw_affiliation_string":"Engineering Research Center of Hubei Province for Clothing Information, Wuhan Textile University, Wuhan, China","institution_ids":["https://openalex.org/I4210119942"]},{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, Wuhan, China","institution_ids":["https://openalex.org/I4210119942"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100380108","display_name":"Zihan Wang","orcid":"https://orcid.org/0000-0003-1056-6326"},"institutions":[{"id":"https://openalex.org/I4210119942","display_name":"Wuhan Textile University","ror":"https://ror.org/02jgsf398","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210119942"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zihan Wang","raw_affiliation_strings":["School of Mathematical &#x0026; Physical Science, Wuhan Textile University,Wuhan,China"],"affiliations":[{"raw_affiliation_string":"School of Mathematical &#x0026; Physical Science, Wuhan Textile University,Wuhan,China","institution_ids":["https://openalex.org/I4210119942"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018911181","display_name":"Hongyan Wan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210119942","display_name":"Wuhan Textile University","ror":"https://ror.org/02jgsf398","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210119942"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongyan Wan","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan Textile University,Wuhan,China","School of Computer Science and Artificial Intelligence, Wuhan Textile University, Wuhan, China","Engineering Research Center of Hubei Province for Clothing Information, Wuhan Textile University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan Textile University,Wuhan,China","institution_ids":["https://openalex.org/I4210119942"]},{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, Wuhan, China","institution_ids":["https://openalex.org/I4210119942"]},{"raw_affiliation_string":"Engineering Research Center of Hubei Province for Clothing Information, Wuhan Textile University, Wuhan, China","institution_ids":["https://openalex.org/I4210119942"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101582959","display_name":"Xingfu Li","orcid":"https://orcid.org/0000-0001-9606-7573"},"institutions":[{"id":"https://openalex.org/I4210119942","display_name":"Wuhan Textile University","ror":"https://ror.org/02jgsf398","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210119942"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingfu Li","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan Textile University,Wuhan,China","School of Computer Science and Artificial Intelligence, Wuhan Textile University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan Textile University,Wuhan,China","institution_ids":["https://openalex.org/I4210119942"]},{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, Wuhan, China","institution_ids":["https://openalex.org/I4210119942"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049366790","display_name":"Yang Deng","orcid":"https://orcid.org/0000-0002-2795-6796"},"institutions":[{"id":"https://openalex.org/I4210119942","display_name":"Wuhan Textile University","ror":"https://ror.org/02jgsf398","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210119942"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Deng","raw_affiliation_strings":["School of Computer Science and Artificial Intelligence, Wuhan Textile University,Wuhan,China","School of Computer Science and Artificial Intelligence, Wuhan Textile University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan Textile University,Wuhan,China","institution_ids":["https://openalex.org/I4210119942"]},{"raw_affiliation_string":"School of Computer Science and Artificial Intelligence, Wuhan Textile University, Wuhan, China","institution_ids":["https://openalex.org/I4210119942"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5024168142"],"corresponding_institution_ids":["https://openalex.org/I4210119942"],"apc_list":null,"apc_paid":null,"fwci":0.4589,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.67388007,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7609255313873291},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.726300835609436},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6197717785835266},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.5844613313674927},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5465145707130432},{"id":"https://openalex.org/keywords/traceability","display_name":"Traceability","score":0.5234239101409912},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.49698641896247864},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4366220533847809},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.42404627799987793},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.41529756784439087},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.09148508310317993},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09143415093421936}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7609255313873291},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.726300835609436},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6197717785835266},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.5844613313674927},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5465145707130432},{"id":"https://openalex.org/C153876917","wikidata":"https://www.wikidata.org/wiki/Q899704","display_name":"Traceability","level":2,"score":0.5234239101409912},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.49698641896247864},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4366220533847809},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.42404627799987793},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.41529756784439087},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.09148508310317993},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09143415093421936},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn54540.2023.10191386","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn54540.2023.10191386","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1078058450","display_name":null,"funder_award_id":"62102291","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W115231168","https://openalex.org/W1576442155","https://openalex.org/W1591261915","https://openalex.org/W1993220166","https://openalex.org/W2005350125","https://openalex.org/W2117190680","https://openalex.org/W2118202700","https://openalex.org/W2132604263","https://openalex.org/W2148143831","https://openalex.org/W2338318698","https://openalex.org/W2532915684","https://openalex.org/W2540556128","https://openalex.org/W2553146174","https://openalex.org/W2616891418","https://openalex.org/W2620860873","https://openalex.org/W2741081919","https://openalex.org/W2767634764","https://openalex.org/W2793857627","https://openalex.org/W2802474178","https://openalex.org/W2885967628","https://openalex.org/W2887628184","https://openalex.org/W2901833024","https://openalex.org/W2963038309","https://openalex.org/W2964064835","https://openalex.org/W2992455574","https://openalex.org/W2993655765","https://openalex.org/W3000094227","https://openalex.org/W3038727996","https://openalex.org/W3086180509","https://openalex.org/W3094370653","https://openalex.org/W3112339193","https://openalex.org/W3155649056","https://openalex.org/W3157576672","https://openalex.org/W3162044134","https://openalex.org/W3210252880","https://openalex.org/W4294658788","https://openalex.org/W6634357899"],"related_works":["https://openalex.org/W4396689146","https://openalex.org/W4200112873","https://openalex.org/W2955796858","https://openalex.org/W4224941037","https://openalex.org/W2004826645","https://openalex.org/W4366990902","https://openalex.org/W4317732970","https://openalex.org/W4388550696","https://openalex.org/W4224922629","https://openalex.org/W4321636153"],"abstract_inverted_index":{"Machine":[0,178],"learning":[1,118],"(ML)":[2],"has":[3,55,138],"been":[4],"widely":[5],"used":[6],"in":[7,210],"trace":[8,19],"link":[9],"recovery":[10],"(TLR)":[11],"to":[12,49,75,187],"reduce":[13],"the":[14,24,35,77,80,95,113,116,129,146,149,166,213,218],"manual":[15],"maintenance":[16],"cost":[17],"of":[18,27,37,53,61,72,79,82,97,115,133,148,168,182,203,212,215,220],"links":[20,29,32],"by":[21,153],"developers.":[22],"However,":[23],"imbalanced":[25],"distribution":[26],"valid":[28,221],"and":[30,87,101,106,217],"invalid":[31],"seriously":[33],"affects":[34],"performance":[36,96,114],"classifiers.":[38],"Although":[39],"a":[40,139],"few":[41],"studies":[42],"have":[43],"applied":[44],"data":[45],"balancing":[46],"techniques":[47],"(DBT)":[48],"ML-based":[50,99,103,143,204],"TLR,":[51],"none":[52],"them":[54],"systematically":[56],"analyzed":[57],"more":[58],"effective":[59],"combinations":[60],"them.":[62],"Therefore,":[63],"we":[64,111],"perform":[65],"an":[66],"empirical":[67],"study":[68],"on":[69,90,123,128,142,160,197],"three":[70],"groups":[71],"control":[73],"experiments":[74],"explore":[76],"impact":[78],"combination":[81],"different":[83],"ML":[84],"methods":[85],"with":[86,105,121,157,206],"without":[88,107],"DBT":[89,108,122,137,207],"TLR":[91,100,104,205],"efficiency.":[92],"We":[93],"compare":[94],"supervised":[98],"unsupervised":[102],"respectively.":[109],"Then,":[110],"analyze":[112],"ensemble":[117],"model":[119,151],"(EM)":[120],"TLR.":[124,144],"The":[125,180],"experimental":[126],"results":[127],"7":[130],"imbalance":[131],"datasets":[132,216],"CoEST":[134],"indicate":[135],"that":[136],"positive":[140],"effect":[141],"Specifically,":[145],"recall":[147],"LR":[150,183],"increased":[152,184],"0.5517":[154],"after":[155],"combining":[156],"most":[158],"DBTs":[159],"EasyClinic(ID-TC),":[161],"while":[162],"Tomek-link":[163],"significantly":[164],"improves":[165],"precision":[167,181],"K-Nearest":[169],"Neighbor":[170],"(KNN),":[171],"Decision":[172],"Tree":[173],"(DT),":[174],"LR,":[175],"Support":[176],"Vector":[177],"(SVM).":[179],"from":[185],"0.5036":[186],"1.0.":[188],"BalanceRF":[189],"is":[190],"best":[191],"at":[192],"increasing":[193],"recall,":[194],"reaching":[195],"1.0":[196],"4":[198],"datasets.":[199],"Moreover,the":[200],"improvement":[201],"degree":[202],"shows":[208],"differences":[209],"terms":[211],"size":[214],"proportion":[219],"links.":[222]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
