{"id":"https://openalex.org/W4310113603","doi":"https://doi.org/10.1145/3568562.3568643","title":"ml-Codesmell: A code smell prediction dataset for machine learning approaches","display_name":"ml-Codesmell: A code smell prediction dataset for machine learning approaches","publication_year":2022,"publication_date":"2022-11-29","ids":{"openalex":"https://openalex.org/W4310113603","doi":"https://doi.org/10.1145/3568562.3568643"},"language":"en","primary_location":{"id":"doi:10.1145/3568562.3568643","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3568562.3568643","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 11th International Symposium on Information and Communication Technology","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002132994","display_name":"Binh Nguyen Thanh","orcid":"https://orcid.org/0000-0002-5650-7738"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Binh Nguyen Thanh","raw_affiliation_strings":["College of Electromechanics, Construction and Agro-forestry of Central Vietnam, Viet Nam","The University of Danang, Vietnam-Korea University of Information and Communication Technology, Viet Nam"],"raw_orcid":"https://orcid.org/0000-0002-5650-7738","affiliations":[{"raw_affiliation_string":"College of Electromechanics, Construction and Agro-forestry of Central Vietnam, Viet Nam","institution_ids":[]},{"raw_affiliation_string":"The University of Danang, Vietnam-Korea University of Information and Communication Technology, Viet Nam","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002774844","display_name":"Minh N. H. Nguyen","orcid":"https://orcid.org/0000-0002-3035-0816"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Minh Nguyen N. H.","raw_affiliation_strings":["The University of Danang, Vietnam-Korea University of Information and Communication Technology, Viet Nam"],"raw_orcid":"https://orcid.org/0000-0002-3035-0816","affiliations":[{"raw_affiliation_string":"The University of Danang, Vietnam-Korea University of Information and Communication Technology, Viet Nam","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050165534","display_name":"Le Thi My Hanh","orcid":null},"institutions":[{"id":"https://openalex.org/I3129492623","display_name":"University of Da Nang","ror":"https://ror.org/03ecpp171","country_code":"VN","type":"education","lineage":["https://openalex.org/I3129492623"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Hanh Le Thi My","raw_affiliation_strings":["The University of Danang, University Of Science and Technology, Viet Nam"],"raw_orcid":"https://orcid.org/0000-0001-6982-7848","affiliations":[{"raw_affiliation_string":"The University of Danang, University Of Science and Technology, Viet Nam","institution_ids":["https://openalex.org/I3129492623"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015841332","display_name":"Nguyen Thanh Binh","orcid":"https://orcid.org/0000-0002-0154-1162"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Binh Nguyen Thanh","raw_affiliation_strings":["College of Electromechanics, Construction and Agro-forestry of Central Vietnam, Viet Nam","The University of Danang, Vietnam-Korea University of Information and Communication Technology, Viet Nam"],"raw_orcid":"https://orcid.org/0000-0002-0154-1162","affiliations":[{"raw_affiliation_string":"College of Electromechanics, Construction and Agro-forestry of Central Vietnam, Viet Nam","institution_ids":[]},{"raw_affiliation_string":"The University of Danang, Vietnam-Korea University of Information and Communication Technology, Viet Nam","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5002132994"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5951,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.87437099,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"368","last_page":"374"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/code-smell","display_name":"Code smell","score":0.852402925491333},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8292170763015747},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.7487698197364807},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.6919902563095093},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6128415465354919},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5989223718643188},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.5434584617614746},{"id":"https://openalex.org/keywords/code-review","display_name":"Code review","score":0.4668223261833191},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4092090427875519},{"id":"https://openalex.org/keywords/static-program-analysis","display_name":"Static program analysis","score":0.3809581995010376},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3243900537490845},{"id":"https://openalex.org/keywords/software-quality","display_name":"Software quality","score":0.26532095670700073},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.25011131167411804},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.16074493527412415}],"concepts":[{"id":"https://openalex.org/C133237599","wikidata":"https://www.wikidata.org/wiki/Q2295111","display_name":"Code smell","level":5,"score":0.852402925491333},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8292170763015747},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.7487698197364807},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.6919902563095093},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6128415465354919},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5989223718643188},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.5434584617614746},{"id":"https://openalex.org/C150292731","wikidata":"https://www.wikidata.org/wiki/Q1342704","display_name":"Code review","level":5,"score":0.4668223261833191},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4092090427875519},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.3809581995010376},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3243900537490845},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.26532095670700073},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.25011131167411804},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.16074493527412415},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3568562.3568643","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3568562.3568643","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 11th International Symposium on Information and Communication Technology","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W649920412","https://openalex.org/W1534729839","https://openalex.org/W1580134275","https://openalex.org/W1709483371","https://openalex.org/W1970029789","https://openalex.org/W1986136726","https://openalex.org/W1988997230","https://openalex.org/W1995317049","https://openalex.org/W1997029057","https://openalex.org/W2001730430","https://openalex.org/W2004147962","https://openalex.org/W2033239109","https://openalex.org/W2045749853","https://openalex.org/W2053703112","https://openalex.org/W2073792498","https://openalex.org/W2099535882","https://openalex.org/W2100925270","https://openalex.org/W2103647733","https://openalex.org/W2113322762","https://openalex.org/W2128802947","https://openalex.org/W2139074146","https://openalex.org/W2154196314","https://openalex.org/W2223640994","https://openalex.org/W2239136520","https://openalex.org/W2338541268","https://openalex.org/W2402199355","https://openalex.org/W2565244242","https://openalex.org/W2589114814","https://openalex.org/W2598959717","https://openalex.org/W2742512005","https://openalex.org/W2787379525","https://openalex.org/W2800788706","https://openalex.org/W2900961173","https://openalex.org/W2911964244","https://openalex.org/W2954327103","https://openalex.org/W3015461837","https://openalex.org/W3020936518","https://openalex.org/W3090964982","https://openalex.org/W4245648766","https://openalex.org/W4248575338","https://openalex.org/W4250325350"],"related_works":["https://openalex.org/W3165221216","https://openalex.org/W4382562158","https://openalex.org/W2896744621","https://openalex.org/W3006945192","https://openalex.org/W4384026574","https://openalex.org/W1976804602","https://openalex.org/W2150625980","https://openalex.org/W1982871693","https://openalex.org/W2390554102","https://openalex.org/W2909969119"],"abstract_inverted_index":{"In":[0,74],"recent":[1],"years,":[2],"many":[3,94],"studies":[4,145],"on":[5,115,146],"detecting":[6,55,147],"code":[7,11,23,32,56,70,86,91,96,108,148],"smells":[8],"in":[9,68,120,154],"source":[10,85,90],"have":[12],"published":[13],"datasets":[14,40,48,63],"with":[15,93],"limited":[16],"characteristics,":[17],"such":[18],"as":[19],"the":[20,34,39,44,47,62,79,116,122,136],"ambiguity":[21],"of":[22,36,38,46],"smell":[24,57,71,109,149],"definitions":[25],"leads":[26],"to":[27,104,141],"different":[28],"interpretations":[29],"for":[30,144],"each":[31],"smell,":[33],"number":[35],"samples":[37],"is":[41,59,139],"small,":[42],"and":[43,61,87,106,131],"features":[45,130],"are":[49,64],"heterogeneous.":[50],"Therefore,":[51],"comparing":[52],"performance":[53],"between":[54,129],"models":[58],"challenging,":[60],"often":[65],"not":[66],"reusable":[67],"other":[69],"detection":[72],"studies.":[73],"this":[75],"work,":[76],"we":[77],"propose":[78],"ml-Codesmell":[80,123,137],"dataset":[81,100,124,138],"created":[82],"by":[83],"analyzing":[84],"extracting":[88],"massive":[89],"metrics":[92],"labelled":[95],"smells.":[97],"The":[98],"proposed":[99],"has":[101],"been":[102],"used":[103],"train":[105],"predict":[107],"using":[110,150],"machine":[111,151],"learning":[112,152],"algorithms.":[113],"Based":[114],"high":[117],"confidential":[118],"F1-score":[119],"evaluation,":[121],"demonstrates":[125],"a":[126],"strong":[127],"correlation":[128],"labels.":[132],"Regarding":[133],"these":[134],"advantages,":[135],"expected":[140],"be":[142],"helpful":[143],"approaches":[153],"software":[155],"development.":[156]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
