{"id":"https://openalex.org/W7126054717","doi":"https://doi.org/10.1007/978-3-032-15984-7_12","title":"Mind the\u00a0Gap: Investigating the\u00a0Impact of\u00a0Data Leakage on\u00a0Machine Learning Predictive Models","display_name":"Mind the\u00a0Gap: Investigating the\u00a0Impact of\u00a0Data Leakage on\u00a0Machine Learning Predictive Models","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7126054717","doi":"https://doi.org/10.1007/978-3-032-15984-7_12"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-032-15984-7_12","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-032-15984-7_12","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124234747","display_name":"Augusto Exenberger Becker","orcid":null},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Augusto Exenberger Becker","raw_affiliation_strings":["Institute of Informatics, Universidade Federal do Rio Grande do Sul (UFRGS), Porto Alegre, RS, Brazil"],"affiliations":[{"raw_affiliation_string":"Institute of Informatics, Universidade Federal do Rio Grande do Sul (UFRGS), Porto Alegre, RS, Brazil","institution_ids":["https://openalex.org/I130442723"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124236253","display_name":"Mariana Recamonde-Mendoza","orcid":null},"institutions":[{"id":"https://openalex.org/I130442723","display_name":"Universidade Federal do Rio Grande do Sul","ror":"https://ror.org/041yk2d64","country_code":"BR","type":"education","lineage":["https://openalex.org/I130442723"]},{"id":"https://openalex.org/I4210097146","display_name":"Hospital de Cl\u00ednicas de Porto Alegre","ror":"https://ror.org/010we4y38","country_code":"BR","type":"healthcare","lineage":["https://openalex.org/I4210097146"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Mariana Recamonde-Mendoza","raw_affiliation_strings":["Bioinformatics Core, Hospital de Cl\u00ednicas de Porto Alegre (HCPA), Porto Alegre, RS, Brazil","Institute of Informatics, Universidade Federal do Rio Grande do Sul (UFRGS), Porto Alegre, RS, Brazil"],"affiliations":[{"raw_affiliation_string":"Bioinformatics Core, Hospital de Cl\u00ednicas de Porto Alegre (HCPA), Porto Alegre, RS, Brazil","institution_ids":["https://openalex.org/I4210097146"]},{"raw_affiliation_string":"Institute of Informatics, Universidade Federal do Rio Grande do Sul (UFRGS), Porto Alegre, RS, Brazil","institution_ids":["https://openalex.org/I130442723"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5124234747"],"corresponding_institution_ids":["https://openalex.org/I130442723"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.52021256,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"166","last_page":"180"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.3052000105381012,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.3052000105381012,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.23739999532699585,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.07079999893903732,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.6956999897956848},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.5746999979019165},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.4672999978065491},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.4244000017642975},{"id":"https://openalex.org/keywords/feature-engineering","display_name":"Feature engineering","score":0.3961000144481659},{"id":"https://openalex.org/keywords/leakage","display_name":"Leakage (economics)","score":0.391400009393692},{"id":"https://openalex.org/keywords/database-normalization","display_name":"Database normalization","score":0.3847000002861023},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.38420000672340393},{"id":"https://openalex.org/keywords/imputation","display_name":"Imputation (statistics)","score":0.37700000405311584}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8079000115394592},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.7802000045776367},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7249000072479248},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.6956999897956848},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.5746999979019165},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.4672999978065491},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.4244000017642975},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.3961000144481659},{"id":"https://openalex.org/C2777042071","wikidata":"https://www.wikidata.org/wiki/Q6509304","display_name":"Leakage (economics)","level":2,"score":0.391400009393692},{"id":"https://openalex.org/C162984825","wikidata":"https://www.wikidata.org/wiki/Q339072","display_name":"Database normalization","level":3,"score":0.3847000002861023},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.38420000672340393},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3822999894618988},{"id":"https://openalex.org/C58041806","wikidata":"https://www.wikidata.org/wiki/Q1660484","display_name":"Imputation (statistics)","level":3,"score":0.37700000405311584},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.3743000030517578},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.35749998688697815},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.34139999747276306},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.3314000070095062},{"id":"https://openalex.org/C60777511","wikidata":"https://www.wikidata.org/wiki/Q3045002","display_name":"Concept drift","level":3,"score":0.32359999418258667},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C107321475","wikidata":"https://www.wikidata.org/wiki/Q5374254","display_name":"Empirical risk minimization","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2632000148296356},{"id":"https://openalex.org/C40423286","wikidata":"https://www.wikidata.org/wiki/Q284172","display_name":"Selection bias","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.260699987411499},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C133199616","wikidata":"https://www.wikidata.org/wiki/Q25386885","display_name":"Empirical modelling","level":2,"score":0.2563999891281128},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/978-3-032-15984-7_12","is_oa":false,"landing_page_url":"https://doi.org/10.1007/978-3-032-15984-7_12","pdf_url":null,"source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2736287575","https://openalex.org/W2916678774","https://openalex.org/W3011190810","https://openalex.org/W3134690108","https://openalex.org/W3155717344","https://openalex.org/W3206431085","https://openalex.org/W3214734349","https://openalex.org/W4313563767","https://openalex.org/W4385576721","https://openalex.org/W4392018482","https://openalex.org/W4401455507"],"related_works":[],"abstract_inverted_index":null,"counts_by_year":[],"updated_date":"2026-02-01T03:34:12.195049","created_date":"2026-01-30T00:00:00"}
