{"id":"https://openalex.org/W7117302206","doi":"https://doi.org/10.1109/access.2025.3648907","title":"A Unified Multi-Label Code Smell Dataset for Code Smell Detection at Different Granularities","display_name":"A Unified Multi-Label Code Smell Dataset for Code Smell Detection at Different Granularities","publication_year":2025,"publication_date":"2025-12-26","ids":{"openalex":"https://openalex.org/W7117302206","doi":"https://doi.org/10.1109/access.2025.3648907"},"language":null,"primary_location":{"id":"doi:10.1109/access.2025.3648907","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3648907","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2025.3648907","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121330412","display_name":"Haneen Alhadeaf","orcid":null},"institutions":[{"id":"https://openalex.org/I28022161","display_name":"King Saud University","ror":"https://ror.org/02f81g417","country_code":"SA","type":"education","lineage":["https://openalex.org/I28022161"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Haneen M. Alhadeaf","raw_affiliation_strings":["Department of Software Engineering, King Saud University, Riyadh, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0002-6069-7087","affiliations":[{"raw_affiliation_string":"Department of Software Engineering, King Saud University, Riyadh, Saudi Arabia","institution_ids":["https://openalex.org/I28022161"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049726789","display_name":"Mubarak Alrashoud","orcid":"https://orcid.org/0000-0002-5902-7414"},"institutions":[{"id":"https://openalex.org/I28022161","display_name":"King Saud University","ror":"https://ror.org/02f81g417","country_code":"SA","type":"education","lineage":["https://openalex.org/I28022161"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Mubarak Alrashoud","raw_affiliation_strings":["Department of Software Engineering, King Saud University, Riyadh, Saudi Arabia"],"raw_orcid":"https://orcid.org/0000-0002-5902-7414","affiliations":[{"raw_affiliation_string":"Department of Software Engineering, King Saud University, Riyadh, Saudi Arabia","institution_ids":["https://openalex.org/I28022161"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.71140253,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":null,"first_page":"714","last_page":"737"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9799000024795532,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.0026000000070780516,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.0019000000320374966,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hamming-distance","display_name":"Hamming distance","score":0.6538000106811523},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6510999798774719},{"id":"https://openalex.org/keywords/jaccard-index","display_name":"Jaccard index","score":0.6478000283241272},{"id":"https://openalex.org/keywords/code-smell","display_name":"Code smell","score":0.5898000001907349},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.5522000193595886},{"id":"https://openalex.org/keywords/hamming-code","display_name":"Hamming code","score":0.4948999881744385},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.426800012588501},{"id":"https://openalex.org/keywords/software-quality","display_name":"Software quality","score":0.41850000619888306}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8021000027656555},{"id":"https://openalex.org/C193319292","wikidata":"https://www.wikidata.org/wiki/Q272172","display_name":"Hamming distance","level":2,"score":0.6538000106811523},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6510999798774719},{"id":"https://openalex.org/C203519979","wikidata":"https://www.wikidata.org/wiki/Q865360","display_name":"Jaccard index","level":3,"score":0.6478000283241272},{"id":"https://openalex.org/C133237599","wikidata":"https://www.wikidata.org/wiki/Q2295111","display_name":"Code smell","level":5,"score":0.5898000001907349},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.5522000193595886},{"id":"https://openalex.org/C73150493","wikidata":"https://www.wikidata.org/wiki/Q853922","display_name":"Hamming code","level":4,"score":0.4948999881744385},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4794999957084656},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.426800012588501},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.41850000619888306},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4133000075817108},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.39980000257492065},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3955000042915344},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.38260000944137573},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.37790000438690186},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.32670000195503235},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3208000063896179},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.3061000108718872},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C63435697","wikidata":"https://www.wikidata.org/wiki/Q864135","display_name":"Binary code","level":3,"score":0.2741999924182892},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.27309998869895935},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C82214349","wikidata":"https://www.wikidata.org/wiki/Q657339","display_name":"Software metric","level":5,"score":0.2612999975681305},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2572000026702881}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/access.2025.3648907","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3648907","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/access.2025.3648907","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3648907","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","display_name":"Life in Land","score":0.4062396287918091}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W649920412","https://openalex.org/W1978813754","https://openalex.org/W2001730430","https://openalex.org/W2100925270","https://openalex.org/W2132201370","https://openalex.org/W2133223948","https://openalex.org/W2137556552","https://openalex.org/W2146241755","https://openalex.org/W2159796803","https://openalex.org/W2164566712","https://openalex.org/W2608628736","https://openalex.org/W2742512005","https://openalex.org/W2789476037","https://openalex.org/W2796404405","https://openalex.org/W2801360630","https://openalex.org/W2899956841","https://openalex.org/W2990340259","https://openalex.org/W3014553393","https://openalex.org/W3020936518","https://openalex.org/W3209820304","https://openalex.org/W4252684946","https://openalex.org/W4280612166","https://openalex.org/W4376137025","https://openalex.org/W4376505268","https://openalex.org/W4387721515","https://openalex.org/W4394730962","https://openalex.org/W4399054139","https://openalex.org/W4412363034"],"related_works":[],"abstract_inverted_index":{"Code":[0],"smell":[1,20],"detection":[2,193],"is":[3,145],"critical":[4],"for":[5,118,131,141,219],"maintaining":[6],"software":[7],"quality":[8],"and":[9,58,69,81,104,110,151,181,200,209,215,228],"enabling":[10],"effective":[11],"refactoring,":[12],"yet":[13],"much":[14],"prior":[15],"work":[16],"identifies":[17],"only":[18],"one":[19,198],"at":[21,55],"a":[22,32,46,114,128],"time.":[23],"This":[24],"single-label":[25],"framing":[26],"misses":[27],"the":[28,90,132,155,165,176,182,188],"real-world":[29],"complexity":[30],"where":[31,203,210],"code":[33],"element":[34],"can":[35],"exhibit":[36],"multiple":[37,195],"co-occurring":[38],"smells.":[39],"We":[40,96],"address":[41],"this":[42],"gap":[43],"by":[44],"creating":[45],"unified":[47,133,156,189],"multi-label":[48,99],"dataset":[49,190],"that":[50,92,224],"combines":[51],"four":[52],"existing":[53],"datasets":[54],"two":[56],"levels\u2014method-level":[57],"class-level\u2014covering":[59],"Long":[60],"Method":[61],"(LM),":[62],"Feature":[63,85],"Envy":[64],"(FE),":[65],"God":[66],"Class":[67,71],"(GC),":[68],"Data":[70],"(DC).":[72],"Our":[73],"approach":[74],"models":[75],"correlations":[76,205],"(co-occurrence)":[77],"among":[78],"these":[79],"smells":[80,196],"links":[82],"them":[83],"to":[84,88,230],"Importance":[86],"analyses":[87],"identify":[89],"metrics":[91],"drive":[93],"each":[94,119],"label.":[95],"evaluate":[97],"three":[98],"strategies\u2014Binary":[100],"Relevance,":[101],"Label":[102],"Powerset,":[103],"Classifier":[105,159],"Chains\u2014paired":[106],"with":[107,136,161,172],"Decision":[108],"Tree":[109],"Random":[111,162,173],"Forest.We":[112],"used":[113],"5-fold":[115,129],"stratified":[116],"cross-validation":[117],"single-level":[120],"dataset.":[121],"To":[122],"prevent":[123],"data":[124],"leakage,":[125],"we":[126],"implemented":[127],"GroupKFold":[130],"dataset,":[134,158],"along":[135],"an":[137],"in-fold":[138],"grid":[139],"search":[140],"hyperparameter":[142],"tuning.":[143],"Performance":[144],"reported":[146],"via":[147],"Jaccard,":[148],"Hamming":[149,178],"Loss,":[150],"Exact":[152,184],"Match.":[153],"On":[154],"Truth-only":[157],"Chains":[160],"Forest":[163,174],"achieves":[164],"highest":[166,183],"Jaccard":[167],"(0.3351),":[168],"while":[169],"Binary":[170],"Relevance":[171],"attains":[175],"lowest":[177],"Loss":[179],"(0.0288)":[180],"Match":[185],"(0.8847).":[186],"Overall,":[187],"enables":[191],"simultaneous":[192],"of":[194],"in":[197],"model":[199],"protocol,":[201],"reveals":[202],"inter-label":[204],"are":[206,212,225],"informative":[207],"(LM\u2013FE)":[208],"they":[211],"not":[213],"(GC\u2013DC),":[214],"maintains":[216],"distribution":[217],"properties":[218],"fair":[220],"comparison,":[221],"yielding":[222],"results":[223],"more":[226],"realistic":[227],"easier":[229],"reuse.":[231]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-26T00:00:00"}
