{"id":"https://openalex.org/W2968356825","doi":"https://doi.org/10.1145/3340482.3342743","title":"Risk-based data validation in machine learning-based software systems","display_name":"Risk-based data validation in machine learning-based software systems","publication_year":2019,"publication_date":"2019-08-08","ids":{"openalex":"https://openalex.org/W2968356825","doi":"https://doi.org/10.1145/3340482.3342743","mag":"2968356825"},"language":"en","primary_location":{"id":"doi:10.1145/3340482.3342743","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3340482.3342743","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd ACM SIGSOFT International Workshop on Machine Learning Techniques for Software Quality Evaluation","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033698477","display_name":"Harald Foidl","orcid":"https://orcid.org/0000-0002-6283-0419"},"institutions":[{"id":"https://openalex.org/I190249584","display_name":"Universit\u00e4t Innsbruck","ror":"https://ror.org/054pv6659","country_code":"AT","type":"education","lineage":["https://openalex.org/I190249584"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Harald Foidl","raw_affiliation_strings":["University of Innsbruck, Austria"],"affiliations":[{"raw_affiliation_string":"University of Innsbruck, Austria","institution_ids":["https://openalex.org/I190249584"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055476405","display_name":"Michael Felderer","orcid":"https://orcid.org/0000-0003-3818-4442"},"institutions":[{"id":"https://openalex.org/I190249584","display_name":"Universit\u00e4t Innsbruck","ror":"https://ror.org/054pv6659","country_code":"AT","type":"education","lineage":["https://openalex.org/I190249584"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Michael Felderer","raw_affiliation_strings":["University of Innsbruck, Austria"],"affiliations":[{"raw_affiliation_string":"University of Innsbruck, Austria","institution_ids":["https://openalex.org/I190249584"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5033698477"],"corresponding_institution_ids":["https://openalex.org/I190249584"],"apc_list":null,"apc_paid":null,"fwci":3.565,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.92942573,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"13","last_page":"18"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9801999926567078,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7668778896331787},{"id":"https://openalex.org/keywords/software-quality","display_name":"Software quality","score":0.6209134459495544},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.5835646986961365},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.564153790473938},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5384758114814758},{"id":"https://openalex.org/keywords/data-validation","display_name":"Data validation","score":0.51031893491745},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.5076702833175659},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4724186956882477},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4565543532371521},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.43975886702537537},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.41040316224098206},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3888079524040222},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.2936890721321106},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.16949111223220825},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.13438284397125244},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.09983822703361511}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7668778896331787},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.6209134459495544},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.5835646986961365},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.564153790473938},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5384758114814758},{"id":"https://openalex.org/C92446256","wikidata":"https://www.wikidata.org/wiki/Q3306762","display_name":"Data validation","level":2,"score":0.51031893491745},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.5076702833175659},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4724186956882477},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4565543532371521},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.43975886702537537},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.41040316224098206},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3888079524040222},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.2936890721321106},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.16949111223220825},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13438284397125244},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.09983822703361511},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3340482.3342743","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3340482.3342743","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd ACM SIGSOFT International Workshop on Machine Learning Techniques for Software Quality Evaluation","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W171594045","https://openalex.org/W266253324","https://openalex.org/W935072310","https://openalex.org/W1503398984","https://openalex.org/W1547145819","https://openalex.org/W1557978960","https://openalex.org/W1610496399","https://openalex.org/W1973389480","https://openalex.org/W2004291985","https://openalex.org/W2019880039","https://openalex.org/W2028487813","https://openalex.org/W2074702228","https://openalex.org/W2109574129","https://openalex.org/W2282821441","https://openalex.org/W2552268348","https://openalex.org/W2552408584","https://openalex.org/W2565989828","https://openalex.org/W2613597870","https://openalex.org/W2616028256","https://openalex.org/W2743948853","https://openalex.org/W2782864149","https://openalex.org/W2808642230","https://openalex.org/W2818472515","https://openalex.org/W2889249015","https://openalex.org/W2894409651","https://openalex.org/W2896671475","https://openalex.org/W2899595183","https://openalex.org/W2903495827","https://openalex.org/W2905275479","https://openalex.org/W2913059114","https://openalex.org/W2945883466","https://openalex.org/W2951501516","https://openalex.org/W2990673659","https://openalex.org/W2991581298","https://openalex.org/W3013869798","https://openalex.org/W3013920665","https://openalex.org/W3015160377","https://openalex.org/W4285719527","https://openalex.org/W4318211321","https://openalex.org/W6636177537"],"related_works":["https://openalex.org/W2976128099","https://openalex.org/W4205770829","https://openalex.org/W3081133439","https://openalex.org/W2334967743","https://openalex.org/W2189292564","https://openalex.org/W2040924833","https://openalex.org/W4386246791","https://openalex.org/W2996210062","https://openalex.org/W4379932535","https://openalex.org/W4281632015"],"abstract_inverted_index":{"Data":[0,137,139],"validation":[1,20,63,72,177,189],"is":[2,34,92,100,167],"an":[3,18],"essential":[4],"requirement":[5],"to":[6,25,30,57,126,156],"ensure":[7],"the":[8,95,106,115,118,128,144,151,157,160,185,192],"reliability":[9],"and":[10,105,179,201],"quality":[11,91,104,112,133,149],"of":[12,21,50,82,88,101,108,117,130,146,153,159,187,194],"Machine":[13],"Learning-based":[14],"Software":[15],"Systems.":[16],"However,":[17],"exhaustive":[19],"all":[22],"data":[23,62,71,84,90,103,132,176,188],"fed":[24],"these":[26],"systems":[27,52],"(i.e.":[28,61,164,175],"up":[29],"several":[31],"thousand":[32],"features)":[33],"practically":[35],"unfeasible.":[36],"In":[37],"addition,":[38],"there":[39],"has":[40],"been":[41],"little":[42],"discussion":[43],"about":[44],"methods":[45],"that":[46,74,97],"support":[47,174],"software":[48,182,203],"engineers":[49,183],"such":[51],"in":[53,191],"determining":[54],"how":[55],"thorough":[56],"validate":[58],"each":[59],"feature":[60,99,113],"rigor).":[64],"Therefore,":[65],"this":[66,109],"paper":[67],"presents":[68],"a":[69,98,196],"conceptual":[70],"approach":[73,171],"prioritizes":[75],"features":[76,154],"based":[77],"on":[78,114],"their":[79],"estimated":[80],"risk":[81,87],"poor":[83,89],"quality.":[85],"The":[86,169],"determined":[93],"by":[94],"probability":[96,129],"low":[102,110,131,147],"impact":[107,145],"(data)":[111,148],"result":[116],"machine":[119,161,198],"learning":[120,162,199],"model.":[121],"Three":[122],"criteria":[123],"are":[124],"presented":[125,170],"estimate":[127],"(Data":[134],"Source":[135],"Quality,":[136],"Smells,":[138],"Pipeline":[140],"Quality).":[141],"To":[142],"determine":[143],"features,":[150],"importance":[152],"according":[155],"performance":[158],"model":[163,200],"Feature":[165],"Importance)":[166],"utilized.":[168],"provides":[172],"decision":[173],"prioritization":[178],"rigor)":[180],"for":[181],"during":[184],"implementation":[186],"techniques":[190],"course":[193],"deploying":[195],"trained":[197],"its":[202],"stack.":[204]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
