{"id":"https://openalex.org/W4417132523","doi":"https://doi.org/10.1109/saner-c66551.2025.00015","title":"Assessing Data Augmentation-Induced Bias in Training and Testing of Machine Learning Models","display_name":"Assessing Data Augmentation-Induced Bias in Training and Testing of Machine Learning Models","publication_year":2025,"publication_date":"2025-03-04","ids":{"openalex":"https://openalex.org/W4417132523","doi":"https://doi.org/10.1109/saner-c66551.2025.00015"},"language":null,"primary_location":{"id":"doi:10.1109/saner-c66551.2025.00015","is_oa":false,"landing_page_url":"https://doi.org/10.1109/saner-c66551.2025.00015","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Software Analysis, Evolution and Reengineering - Companion (SANER-C)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115022590","display_name":"Riddhi More","orcid":null},"institutions":[{"id":"https://openalex.org/I39470171","display_name":"University of Ontario Institute of Technology","ror":"https://ror.org/016zre027","country_code":"CA","type":"education","lineage":["https://openalex.org/I39470171"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Riddhi More","raw_affiliation_strings":["Ontario Tech University,Oshawa,ON,Canada"],"affiliations":[{"raw_affiliation_string":"Ontario Tech University,Oshawa,ON,Canada","institution_ids":["https://openalex.org/I39470171"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001072274","display_name":"Jeremy S. Bradbury","orcid":"https://orcid.org/0000-0002-5204-908X"},"institutions":[{"id":"https://openalex.org/I39470171","display_name":"University of Ontario Institute of Technology","ror":"https://ror.org/016zre027","country_code":"CA","type":"education","lineage":["https://openalex.org/I39470171"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jeremy S. Bradbury","raw_affiliation_strings":["Ontario Tech University,Oshawa,ON,Canada"],"affiliations":[{"raw_affiliation_string":"Ontario Tech University,Oshawa,ON,Canada","institution_ids":["https://openalex.org/I39470171"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5115022590"],"corresponding_institution_ids":["https://openalex.org/I39470171"],"apc_list":null,"apc_paid":null,"fwci":4.3978,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.948441,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"57","last_page":"60"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.8578000068664551,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.8578000068664551,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.0430000014603138,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.022099999710917473,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/debugging","display_name":"Debugging","score":0.7210999727249146},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5293999910354614},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5200999975204468},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.4887000024318695},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.4560999870300293},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4214000105857849},{"id":"https://openalex.org/keywords/non-regression-testing","display_name":"Non-regression testing","score":0.3765000104904175},{"id":"https://openalex.org/keywords/keyword-driven-testing","display_name":"Keyword-driven testing","score":0.37380000948905945},{"id":"https://openalex.org/keywords/software-testing","display_name":"Software testing","score":0.3328000009059906}],"concepts":[{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.7210999727249146},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.7145000100135803},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7077000141143799},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5724999904632568},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5293999910354614},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5200999975204468},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.4887000024318695},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.4560999870300293},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.43709999322891235},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4214000105857849},{"id":"https://openalex.org/C86469151","wikidata":"https://www.wikidata.org/wiki/Q917415","display_name":"Non-regression testing","level":5,"score":0.3765000104904175},{"id":"https://openalex.org/C169168650","wikidata":"https://www.wikidata.org/wiki/Q1675637","display_name":"Keyword-driven testing","level":5,"score":0.37380000948905945},{"id":"https://openalex.org/C2984328558","wikidata":"https://www.wikidata.org/wiki/Q188522","display_name":"Software testing","level":3,"score":0.3328000009059906},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.33070001006126404},{"id":"https://openalex.org/C1009929","wikidata":"https://www.wikidata.org/wiki/Q179550","display_name":"Software bug","level":3,"score":0.32679998874664307},{"id":"https://openalex.org/C34585555","wikidata":"https://www.wikidata.org/wiki/Q1368723","display_name":"Learning curve","level":2,"score":0.3230000138282776},{"id":"https://openalex.org/C10272871","wikidata":"https://www.wikidata.org/wiki/Q929972","display_name":"Software inspection","level":5,"score":0.31869998574256897},{"id":"https://openalex.org/C188598960","wikidata":"https://www.wikidata.org/wiki/Q7705805","display_name":"Test strategy","level":3,"score":0.28130000829696655},{"id":"https://openalex.org/C182122060","wikidata":"https://www.wikidata.org/wiki/Q6752328","display_name":"Manual testing","level":5,"score":0.27790001034736633},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.2777000069618225},{"id":"https://openalex.org/C162443782","wikidata":"https://www.wikidata.org/wiki/Q1066228","display_name":"White-box testing","level":5,"score":0.2696000039577484},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C111065885","wikidata":"https://www.wikidata.org/wiki/Q1189053","display_name":"Fuzz testing","level":3,"score":0.2667999863624573},{"id":"https://openalex.org/C87007009","wikidata":"https://www.wikidata.org/wiki/Q210832","display_name":"Statistical hypothesis testing","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.26030001044273376},{"id":"https://openalex.org/C139143892","wikidata":"https://www.wikidata.org/wiki/Q7441615","display_name":"Search-based software engineering","level":5,"score":0.2596000134944916},{"id":"https://openalex.org/C106159264","wikidata":"https://www.wikidata.org/wiki/Q17146789","display_name":"Random testing","level":4,"score":0.25609999895095825}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/saner-c66551.2025.00015","is_oa":false,"landing_page_url":"https://doi.org/10.1109/saner-c66551.2025.00015","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Software Analysis, Evolution and Reengineering - Companion (SANER-C)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2148143831","https://openalex.org/W2957454295","https://openalex.org/W3012420557","https://openalex.org/W3098605233","https://openalex.org/W4384009631","https://openalex.org/W4402670893","https://openalex.org/W4403413408","https://openalex.org/W4410536892"],"related_works":[],"abstract_inverted_index":{"Data":[0],"augmentation":[1,39],"has":[2],"become":[3],"a":[4,49,87],"standard":[5],"practice":[6],"in":[7,18,43,69,79,83,112],"software":[8,44],"engineering":[9],"to":[10,66,98],"address":[11],"limited":[12],"or":[13],"imbalanced":[14],"data":[15,28,56,73],"sets,":[16],"particularly":[17],"specialized":[19],"domains":[20],"like":[21],"test":[22,93,99],"classification":[23],"and":[24,37,46,102],"bug":[25],"detection":[26],"where":[27,71],"can":[29,115],"be":[30],"scarce.":[31],"Although":[32],"techniques":[33],"such":[34],"as":[35],"SMOTE":[36],"mutation-based":[38],"are":[40,75],"widely":[41],"used":[42,76],"testing":[45,84,113],"debugging":[47],"applications,":[48],"rigorous":[50],"understanding":[51],"of":[52,91,109],"how":[53,97],"augmented":[54,72,110],"training":[55,80],"impacts":[57],"model":[58,118],"bias":[59,68,101],"is":[60,63],"lacking.":[61],"It":[62],"especially":[64],"critical":[65],"consider":[67],"scenarios":[70],"sets":[74,114],"not":[77],"just":[78],"but":[81],"also":[82],"models.":[85],"Through":[86],"comprehensive":[88],"case":[89],"study":[90],"flaky":[92],"classification,":[94],"we":[95],"demonstrate":[96],"for":[100],"understand":[103],"the":[104,107],"impact":[105],"that":[106],"inclusion":[108],"samples":[111],"have":[116],"on":[117],"evaluation.":[119]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-04-11T08:14:18.477133","created_date":"2025-12-08T00:00:00"}
