{"id":"https://openalex.org/W4312690534","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892280","title":"VulBERTa: Simplified Source Code Pre-Training for Vulnerability Detection","display_name":"VulBERTa: Simplified Source Code Pre-Training for Vulnerability Detection","publication_year":2022,"publication_date":"2022-07-18","ids":{"openalex":"https://openalex.org/W4312690534","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892280"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn55064.2022.9892280","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892280","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078674959","display_name":"Hazim Hanif","orcid":"https://orcid.org/0000-0002-9140-6625"},"institutions":[{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]},{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB","MY"],"is_corresponding":false,"raw_author_name":"Hazim Hanif","raw_affiliation_strings":["Imperial College,Department of Computing,London,UK","Department of Computing Imperial College London, UK;","Faculty of Computer Science and Information Technology, University of Malaya, Malaysia","Department of Computing Imperial College London, UK"],"affiliations":[{"raw_affiliation_string":"Imperial College,Department of Computing,London,UK","institution_ids":["https://openalex.org/I47508984"]},{"raw_affiliation_string":"Department of Computing Imperial College London, UK;","institution_ids":["https://openalex.org/I47508984"]},{"raw_affiliation_string":"Faculty of Computer Science and Information Technology, University of Malaya, Malaysia","institution_ids":["https://openalex.org/I33849332"]},{"raw_affiliation_string":"Department of Computing Imperial College London, UK","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043151499","display_name":"Sergio Maffeis","orcid":"https://orcid.org/0000-0003-1514-6857"},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]},{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]}],"countries":["GB","MY"],"is_corresponding":true,"raw_author_name":"Sergio Maffeis","raw_affiliation_strings":["University of Malaya,Faculty of Computer Science and Information Technology,Malaysia","Department of Computing Imperial College London, UK;","Department of Computing Imperial College London, UK","Faculty of Computer Science and Information Technology, University of Malaya, Malaysia"],"affiliations":[{"raw_affiliation_string":"University of Malaya,Faculty of Computer Science and Information Technology,Malaysia","institution_ids":["https://openalex.org/I33849332"]},{"raw_affiliation_string":"Department of Computing Imperial College London, UK;","institution_ids":["https://openalex.org/I47508984"]},{"raw_affiliation_string":"Department of Computing Imperial College London, UK","institution_ids":["https://openalex.org/I47508984"]},{"raw_affiliation_string":"Faculty of Computer Science and Information Technology, University of Malaya, Malaysia","institution_ids":["https://openalex.org/I33849332"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5043151499"],"corresponding_institution_ids":["https://openalex.org/I33849332","https://openalex.org/I47508984"],"apc_list":null,"apc_paid":null,"fwci":18.677,"has_fulltext":false,"cited_by_count":138,"citation_normalized_percentile":{"value":0.99528302,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12479","display_name":"Web Application Security Vulnerabilities","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8231282830238342},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6686510443687439},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.6092228889465332},{"id":"https://openalex.org/keywords/train","display_name":"Train","score":0.518793523311615},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5156654119491577},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5109567046165466},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4965091347694397},{"id":"https://openalex.org/keywords/simplicity","display_name":"Simplicity","score":0.48481717705726624},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4823436439037323},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.46566155552864075},{"id":"https://openalex.org/keywords/vulnerability","display_name":"Vulnerability (computing)","score":0.4301331043243408},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.41291293501853943},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3481348752975464},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.17965799570083618},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.08220729231834412}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8231282830238342},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6686510443687439},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.6092228889465332},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.518793523311615},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5156654119491577},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5109567046165466},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4965091347694397},{"id":"https://openalex.org/C2776372474","wikidata":"https://www.wikidata.org/wiki/Q508291","display_name":"Simplicity","level":2,"score":0.48481717705726624},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4823436439037323},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.46566155552864075},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.4301331043243408},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.41291293501853943},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3481348752975464},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.17965799570083618},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.08220729231834412},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn55064.2022.9892280","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892280","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.46000000834465027,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W46679369","https://openalex.org/W1832693441","https://openalex.org/W1992114977","https://openalex.org/W2069268700","https://openalex.org/W2079735306","https://openalex.org/W2559874352","https://openalex.org/W2770225980","https://openalex.org/W2781491433","https://openalex.org/W2885030880","https://openalex.org/W2896457183","https://openalex.org/W2923014074","https://openalex.org/W2962960733","https://openalex.org/W2965373594","https://openalex.org/W2970971581","https://openalex.org/W2972135640","https://openalex.org/W2996428491","https://openalex.org/W3035882142","https://openalex.org/W3098605233","https://openalex.org/W3101228802","https://openalex.org/W3119507053","https://openalex.org/W3122890974","https://openalex.org/W3126675481","https://openalex.org/W3127736190","https://openalex.org/W3127782461","https://openalex.org/W3137781054","https://openalex.org/W3161071537","https://openalex.org/W3163206498","https://openalex.org/W3163521353","https://openalex.org/W3166095789","https://openalex.org/W3170092793","https://openalex.org/W3173063387","https://openalex.org/W3183962691","https://openalex.org/W3196620981","https://openalex.org/W4287328196","https://openalex.org/W4288076143","https://openalex.org/W4295312788","https://openalex.org/W4385245566","https://openalex.org/W6601894380","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6767260250","https://openalex.org/W6768021236","https://openalex.org/W6779068807","https://openalex.org/W6783197149","https://openalex.org/W6790588633","https://openalex.org/W6791743870"],"related_works":["https://openalex.org/W4380075502","https://openalex.org/W4223943233","https://openalex.org/W4312200629","https://openalex.org/W4360585206","https://openalex.org/W4364306694","https://openalex.org/W4380086463","https://openalex.org/W4225161397","https://openalex.org/W3014300295","https://openalex.org/W3164822677","https://openalex.org/W2795261237"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"VulBERTa,":[3],"a":[4,18,22,36],"deep":[5,37],"learning":[6],"approach":[7,16,57],"to":[8,49],"detect":[9],"security":[10],"vulnerabilities":[11],"in":[12,101],"source":[13],"code.":[14],"Our":[15],"pre-trains":[17],"RoBERTa":[19],"model":[20,34,111],"with":[21],"custom":[23],"tokenisation":[24],"pipeline":[25],"on":[26,58],"real-world":[27],"code":[28,42],"from":[29],"open-source":[30],"C/C++":[31],"projects.":[32],"The":[33,78],"learns":[35],"knowledge":[38],"representation":[39],"of":[40,103,105,110],"the":[41],"syntax":[43],"and":[44,60,71,73,76,87,98,108],"semantics,":[45],"which":[46],"we":[47],"leverage":[48],"train":[50],"vulnerability":[51,62],"detection":[52,63],"classifiers.":[53],"We":[54],"evaluate":[55],"our":[56],"binary":[59],"multi-class":[61],"tasks":[64],"across":[65,91],"several":[66],"datasets":[67],"(Vuldeepecker,":[68],"Draper,":[69],"REVEAL":[70],"muVuldeepecker)":[72],"benchmarks":[74],"(CodeXGLUE":[75],"D2A).":[77],"evaluation":[79],"results":[80],"show":[81],"that":[82],"VulBERTa":[83],"achieves":[84],"state-of-the-art":[85],"performance":[86],"outperforms":[88],"existing":[89],"approaches":[90],"different":[92],"datasets,":[93],"despite":[94],"its":[95],"conceptual":[96],"simplicity,":[97],"limited":[99],"cost":[100],"terms":[102],"size":[104],"training":[106],"data":[107],"number":[109],"parameters.":[112]},"counts_by_year":[{"year":2026,"cited_by_count":10},{"year":2025,"cited_by_count":62},{"year":2024,"cited_by_count":43},{"year":2023,"cited_by_count":20},{"year":2022,"cited_by_count":3}],"updated_date":"2026-04-12T07:58:50.170612","created_date":"2023-01-05T00:00:00"}
