{"id":"https://openalex.org/W4388926368","doi":"https://doi.org/10.48550/arxiv.2311.12019","title":"An Empirical Study of Self-Admitted Technical Debt in Machine Learning Software","display_name":"An Empirical Study of Self-Admitted Technical Debt in Machine Learning Software","publication_year":2023,"publication_date":"2023-11-20","ids":{"openalex":"https://openalex.org/W4388926368","doi":"https://doi.org/10.48550/arxiv.2311.12019"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2311.12019","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.12019","pdf_url":"https://arxiv.org/pdf/2311.12019","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2311.12019","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102778940","display_name":"Aaditya Bhatia","orcid":"https://orcid.org/0000-0002-3552-9460"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhatia, Aaditya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071052367","display_name":"Foutse Khomh","orcid":"https://orcid.org/0000-0002-5704-4173"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khomh, Foutse","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058164381","display_name":"Bram Adams","orcid":"https://orcid.org/0000-0001-7213-4006"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adams, Bram","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5091586373","display_name":"Ahmed E. Hassan","orcid":"https://orcid.org/0000-0001-7749-5513"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hassan, Ahmed E","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9674999713897705,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.945900022983551,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/technical-debt","display_name":"Technical debt","score":0.8647644519805908},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6823471784591675},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5247840285301208},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5212215185165405},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.5144531726837158},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.50095534324646},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.49092036485671997},{"id":"https://openalex.org/keywords/software-quality","display_name":"Software quality","score":0.4824584126472473},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.45495525002479553},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45031052827835083},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4438214600086212},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.430887371301651},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4291471242904663},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4081673324108124},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2011747658252716},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.12266823649406433}],"concepts":[{"id":"https://openalex.org/C159198006","wikidata":"https://www.wikidata.org/wiki/Q1532172","display_name":"Technical debt","level":4,"score":0.8647644519805908},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6823471784591675},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5247840285301208},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5212215185165405},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.5144531726837158},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.50095534324646},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.49092036485671997},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.4824584126472473},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.45495525002479553},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45031052827835083},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4438214600086212},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.430887371301651},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4291471242904663},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4081673324108124},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2011747658252716},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.12266823649406433},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2311.12019","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.12019","pdf_url":"https://arxiv.org/pdf/2311.12019","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2311.12019","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2311.12019","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2311.12019","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2311.12019","pdf_url":"https://arxiv.org/pdf/2311.12019","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[{"display_name":"Partnerships for the goals","id":"https://metadata.un.org/sdg/17","score":0.49000000953674316}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1840755814","https://openalex.org/W2060038150","https://openalex.org/W2031271477","https://openalex.org/W4249277012","https://openalex.org/W119580281","https://openalex.org/W4380568682","https://openalex.org/W4387158598","https://openalex.org/W2212955619","https://openalex.org/W3124296310","https://openalex.org/W4387411615"],"abstract_inverted_index":{"The":[0],"emergence":[1],"of":[2,45,61,140,149,161,168,181,189],"open-source":[3,113],"ML":[4,12,19,28,108,114,154,196,221],"libraries":[5],"such":[6,169],"as":[7,96],"TensorFlow":[8],"and":[9,38,71,156,202,214],"Google":[10],"Auto":[11],"has":[13],"enabled":[14],"developers":[15,32],"to":[16,42,104,145,164,209,229],"harness":[17],"state-of-the-art":[18],"algorithms":[20],"with":[21,120],"minimal":[22],"overhead.":[23],"However,":[24],"during":[25,77,238],"this":[26],"accelerated":[27],"development":[29,72,226],"process,":[30],"said":[31],"may":[33],"often":[34,83],"make":[35],"sub-optimal":[36,69],"design":[37,70],"implementation":[39],"decisions,":[40],"leading":[41],"the":[43,59,62,92,132,141,147,153,162,166,186,225],"introduction":[44],"technical":[46,98,150],"debt":[47,99,151,210],"that,":[48],"if":[49],"not":[50],"addressed":[51],"promptly,":[52],"can":[53],"have":[54,177],"a":[55,137,158,178],"significant":[56],"impact":[57],"on":[58],"quality":[60],"ML-based":[63],"software.":[64],"Developers":[65],"frequently":[66],"acknowledge":[67],"these":[68],"choices":[73],"through":[74],"code":[75,109,129,240],"comments":[76,130],"software":[78],"development.":[79],"These":[80],"comments,":[81],"which":[82],"highlight":[84],"areas":[85],"requiring":[86],"additional":[87],"work":[88],"or":[89],"refinement":[90],"in":[91,107,127,152,191,220,224],"future,":[93],"are":[94,206,235],"known":[95],"self-admitted":[97],"(SATD).":[100],"This":[101],"paper":[102],"aims":[103],"investigate":[105],"SATD":[106,126,143,163,182,190],"by":[110],"analyzing":[111],"318":[112,121],"projects":[115,176,222],"across":[116],"five":[117],"domains,":[118],"along":[119],"non-ML":[122,230],"projects.":[123,194,231],"We":[124,171],"detected":[125],"source":[128],"throughout":[131],"different":[133],"project":[134],"snapshots,":[135],"conducted":[136],"manual":[138],"analysis":[139,160],"identified":[142],"sample":[144],"comprehend":[146],"nature":[148],"code,":[155],"performed":[157],"survival":[159],"understand":[165],"evolution":[167],"debts.":[170],"observed:":[172],"i)":[173],"Machine":[174],"learning":[175,193],"median":[179,187],"percentage":[180,188],"that":[183,242],"is":[184],"twice":[185],"non-machine":[192],"ii)":[195],"pipeline":[197],"components":[198],"for":[199],"data":[200],"preprocessing":[201],"model":[203,212],"generation":[204],"logic":[205],"more":[207],"susceptible":[208],"than":[211],"validation":[213],"deployment":[215],"components.":[216],"iii)":[217],"SATDs":[218,234],"appear":[219],"earlier":[223],"process":[227],"compared":[228],"iv)":[232],"Long-lasting":[233],"typically":[236],"introduced":[237],"extensive":[239],"changes":[241],"span":[243],"multiple":[244],"files":[245],"exhibiting":[246],"low":[247],"complexity.":[248]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2023-11-23T00:00:00"}
