{"id":"https://openalex.org/W4296483261","doi":"https://doi.org/10.1007/s10664-022-10197-4","title":"Learning from what we know: How to perform vulnerability prediction using noisy historical data","display_name":"Learning from what we know: How to perform vulnerability prediction using noisy historical data","publication_year":2022,"publication_date":"2022-09-20","ids":{"openalex":"https://openalex.org/W4296483261","doi":"https://doi.org/10.1007/s10664-022-10197-4"},"language":"en","primary_location":{"id":"doi:10.1007/s10664-022-10197-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10664-022-10197-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10664-022-10197-4.pdf","source":{"id":"https://openalex.org/S109852484","display_name":"Empirical Software Engineering","issn_l":"1382-3256","issn":["1382-3256","1573-7616"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Empirical Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10664-022-10197-4.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079315279","display_name":"Aayush Garg","orcid":"https://orcid.org/0000-0002-2507-8846"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]}],"countries":["LU"],"is_corresponding":true,"raw_author_name":"Aayush Garg","raw_affiliation_strings":["University of Luxembourg, Esch-sur-Alzette, Luxembourg"],"affiliations":[{"raw_affiliation_string":"University of Luxembourg, Esch-sur-Alzette, Luxembourg","institution_ids":["https://openalex.org/I186903577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032855338","display_name":"Renzo Degiovanni","orcid":"https://orcid.org/0000-0003-1611-3969"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]}],"countries":["LU"],"is_corresponding":false,"raw_author_name":"Renzo Degiovanni","raw_affiliation_strings":["University of Luxembourg, Esch-sur-Alzette, Luxembourg"],"affiliations":[{"raw_affiliation_string":"University of Luxembourg, Esch-sur-Alzette, Luxembourg","institution_ids":["https://openalex.org/I186903577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101828371","display_name":"Matthieu Jimenez","orcid":"https://orcid.org/0000-0003-4061-1108"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]}],"countries":["LU"],"is_corresponding":false,"raw_author_name":"Matthieu Jimenez","raw_affiliation_strings":["University of Luxembourg, Esch-sur-Alzette, Luxembourg"],"affiliations":[{"raw_affiliation_string":"University of Luxembourg, Esch-sur-Alzette, Luxembourg","institution_ids":["https://openalex.org/I186903577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000695937","display_name":"Maxime Cordy","orcid":"https://orcid.org/0000-0001-8312-1358"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]}],"countries":["LU"],"is_corresponding":false,"raw_author_name":"Maxime Cordy","raw_affiliation_strings":["University of Luxembourg, Esch-sur-Alzette, Luxembourg"],"affiliations":[{"raw_affiliation_string":"University of Luxembourg, Esch-sur-Alzette, Luxembourg","institution_ids":["https://openalex.org/I186903577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081145634","display_name":"Mike Papadakis","orcid":"https://orcid.org/0000-0003-1852-2547"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]}],"countries":["LU"],"is_corresponding":false,"raw_author_name":"Mike Papadakis","raw_affiliation_strings":["University of Luxembourg, Esch-sur-Alzette, Luxembourg"],"affiliations":[{"raw_affiliation_string":"University of Luxembourg, Esch-sur-Alzette, Luxembourg","institution_ids":["https://openalex.org/I186903577"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040574362","display_name":"Yves Le Traon","orcid":"https://orcid.org/0000-0002-1045-4861"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]}],"countries":["LU"],"is_corresponding":false,"raw_author_name":"Yves Le Traon","raw_affiliation_strings":["University of Luxembourg, Esch-sur-Alzette, Luxembourg"],"affiliations":[{"raw_affiliation_string":"University of Luxembourg, Esch-sur-Alzette, Luxembourg","institution_ids":["https://openalex.org/I186903577"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5079315279"],"corresponding_institution_ids":["https://openalex.org/I186903577"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890},"fwci":6.6838,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.96882622,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"27","issue":"7","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vulnerability","display_name":"Vulnerability (computing)","score":0.76629638671875},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7419809103012085},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6105280518531799},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5139724612236023},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5005016326904297},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.47295352816581726},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.46638157963752747},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.45855090022087097},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.44271743297576904},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.43186822533607483},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.32992154359817505},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.2996236979961395}],"concepts":[{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.76629638671875},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7419809103012085},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6105280518531799},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5139724612236023},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5005016326904297},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.47295352816581726},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.46638157963752747},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.45855090022087097},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.44271743297576904},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.43186822533607483},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.32992154359817505},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.2996236979961395},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s10664-022-10197-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10664-022-10197-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10664-022-10197-4.pdf","source":{"id":"https://openalex.org/S109852484","display_name":"Empirical Software Engineering","issn_l":"1382-3256","issn":["1382-3256","1573-7616"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Empirical Software Engineering","raw_type":"journal-article"},{"id":"pmh:oai:orbilu.uni.lu:10993/45529","is_oa":true,"landing_page_url":"https://github.com/garghub/TROVON","pdf_url":null,"source":{"id":"https://openalex.org/S4306401815","display_name":"Open Repository and Bibliography (University of Luxembourg)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I186903577","host_organization_name":"University of Luxembourg","host_organization_lineage":["https://openalex.org/I186903577"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Empirical Software Engineering (2022-09-20)","raw_type":"peer reviewed"}],"best_oa_location":{"id":"doi:10.1007/s10664-022-10197-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10664-022-10197-4","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10664-022-10197-4.pdf","source":{"id":"https://openalex.org/S109852484","display_name":"Empirical Software Engineering","issn_l":"1382-3256","issn":["1382-3256","1573-7616"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Empirical Software Engineering","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8203274608","display_name":null,"funder_award_id":"INTER/ANR/18/12632675/SATOCROSS","funder_id":"https://openalex.org/F4320321038","funder_display_name":"Fonds National de la Recherche Luxembourg"}],"funders":[{"id":"https://openalex.org/F4320321038","display_name":"Fonds National de la Recherche Luxembourg","ror":"https://ror.org/039z13y21"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4296483261.pdf","grobid_xml":"https://content.openalex.org/works/W4296483261.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W1615337185","https://openalex.org/W1655956671","https://openalex.org/W1997236144","https://openalex.org/W1997646511","https://openalex.org/W2004758929","https://openalex.org/W2043837581","https://openalex.org/W2046830558","https://openalex.org/W2064675550","https://openalex.org/W2079753286","https://openalex.org/W2109553965","https://openalex.org/W2118283821","https://openalex.org/W2126513753","https://openalex.org/W2137789775","https://openalex.org/W2146338950","https://openalex.org/W2151666086","https://openalex.org/W2166336492","https://openalex.org/W2360967250","https://openalex.org/W2402619042","https://openalex.org/W2504360466","https://openalex.org/W2511803001","https://openalex.org/W2564850588","https://openalex.org/W2579253943","https://openalex.org/W2591925664","https://openalex.org/W2594990650","https://openalex.org/W2605202003","https://openalex.org/W2781491433","https://openalex.org/W2900690747","https://openalex.org/W2967556797","https://openalex.org/W2970413753","https://openalex.org/W2972082064","https://openalex.org/W2984772254","https://openalex.org/W2993007949","https://openalex.org/W3101228802","https://openalex.org/W4206242116","https://openalex.org/W4252684946","https://openalex.org/W4294607971"],"related_works":["https://openalex.org/W4390608645","https://openalex.org/W4394895745","https://openalex.org/W4247566972","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2497432351","https://openalex.org/W4206777497","https://openalex.org/W2910064364","https://openalex.org/W2964083560","https://openalex.org/W3117807895"],"abstract_inverted_index":{"Abstract":[0],"Vulnerability":[1,175],"prediction":[2,183,191],"refers":[3],"to":[4,15,40,76,126],"the":[5,41,46,50,54,114,129,137,173,182],"problem":[6,20],"of":[7,49,139,185,215,232],"identifying":[8],"system":[9],"components":[10,80,98],"that":[11,35,93,168,181],"are":[12,62,68],"most":[13,60],"likely":[14],"be":[16],"vulnerable.":[17],"Typically,":[18],"this":[19,111],"is":[21],"tackled":[22],"by":[23,112,147],"training":[24],"binary":[25],"classifiers":[26,72],"on":[27,153],"historical":[28,57,166],"data.":[29,143],"Unfortunately,":[30],"recent":[31],"research":[32],"has":[33],"shown":[34],"such":[36,193],"approaches":[37],"underperform":[38],"due":[39],"following":[42],"two":[43],"reasons:":[44],"a)":[45],"imbalanced":[47],"nature":[48],"problem,":[51],"and":[52,103,117,141,163,210,229],"b)":[53],"inherently":[55],"noisy":[56,140],"data,":[58],"i.e.,":[59,133,159],"vulnerabilities":[61,167],"discovered":[63],"much":[64],"later":[65],"than":[66,100],"they":[67,74],"introduced.":[69],"This":[70,122],"misleads":[71],"as":[73,81,106,194],"learn":[75,127],"recognize":[77],"actual":[78],"vulnerable":[79,97,102],"non-vulnerable.":[82],"To":[83],"tackle":[84],"these":[85],"issues,":[86],"we":[87,131],"propose":[88],"TROVON":[89,124,146,186],",":[90,197,199,202,205,207,209,228],"a":[91],"technique":[92],"learns":[94],"from":[95,101,128],"known":[96,115],"rather":[99],"non-vulnerable":[104],"components,":[105],"typically":[107],"performed.":[108],"We":[109,144],"perform":[110],"contrasting":[113],"vulnerable,":[116],"their":[118],"respective":[119],"fixed":[120],"components.":[121],"way,":[123],"manages":[125],"things":[130],"know,":[132],"vulnerabilities,":[134],"hence":[135],"reducing":[136],"effects":[138],"unbalanced":[142],"evaluate":[145],"comparing":[148],"it":[149],"with":[150,165,212],"existing":[151,189],"techniques":[152,192],"three":[154],"security-critical":[155],"open":[156],"source":[157],"systems,":[158],"Linux":[160],"Kernel,":[161],"OpenSSL,":[162],"Wireshark,":[164],"have":[169],"been":[170],"reported":[171],"in":[172,217],"National":[174],"Database":[176],"(NVD).":[177],"Our":[178],"evaluation":[179],"demonstrates":[180],"capability":[184],"significantly":[187],"outperforms":[188],"vulnerability":[190],"Software":[195],"Metrics":[196],"Imports":[198],"Function":[200],"Calls":[201],"Text":[203],"Mining":[204],"Devign":[206],"LSTM":[208],"LSTM-RF":[211],"an":[213,230],"improvement":[214,231],"40.84%":[216],"Matthews":[218],"Correlation":[219],"Coefficient":[220],"(MCC)":[221],"score":[222],"under":[223,234],"Clean":[224],"Training":[225,236],"Data":[226,237],"Settings":[227,238],"35.52%":[233],"Realistic":[235],".":[239]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":8}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
