{"id":"https://openalex.org/W4392021591","doi":"https://doi.org/10.1007/s10664-023-10405-9","title":"Analyzing source code vulnerabilities in the D2A dataset with ML ensembles and C-BERT","display_name":"Analyzing source code vulnerabilities in the D2A dataset with ML ensembles and C-BERT","publication_year":2024,"publication_date":"2024-02-22","ids":{"openalex":"https://openalex.org/W4392021591","doi":"https://doi.org/10.1007/s10664-023-10405-9"},"language":"en","primary_location":{"id":"doi:10.1007/s10664-023-10405-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10664-023-10405-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10664-023-10405-9.pdf","source":{"id":"https://openalex.org/S109852484","display_name":"Empirical Software Engineering","issn_l":"1382-3256","issn":["1382-3256","1573-7616"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Empirical Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10664-023-10405-9.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086021292","display_name":"Saurabh Pujar","orcid":"https://orcid.org/0000-0002-9772-3162"},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Saurabh Pujar","raw_affiliation_strings":["IBM T. J. Watson Research Center, Yorktown Heights, NY, USA"],"raw_orcid":"https://orcid.org/0000-0002-9772-3162","affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039824895","display_name":"Yunhui Zheng","orcid":"https://orcid.org/0000-0002-6794-3199"},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yunhui Zheng","raw_affiliation_strings":["IBM T. J. Watson Research Center, Yorktown Heights, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022631353","display_name":"Luca Buratti","orcid":"https://orcid.org/0009-0007-1468-9995"},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Luca Buratti","raw_affiliation_strings":["IBM T. J. Watson Research Center, Yorktown Heights, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014748574","display_name":"Burn Lewis","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Burn Lewis","raw_affiliation_strings":["IBM T. J. Watson Research Center, Yorktown Heights, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004718749","display_name":"Y. Q. Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]},{"id":"https://openalex.org/I99613584","display_name":"National Taipei University","ror":"https://ror.org/03e29r284","country_code":"TW","type":"education","lineage":["https://openalex.org/I99613584"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yunchung Chen","raw_affiliation_strings":["National Taiwan University, Taipei City, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taipei City, Taiwan","institution_ids":["https://openalex.org/I99613584","https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090845035","display_name":"Jim Laredo","orcid":"https://orcid.org/0000-0002-4915-0304"},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jim Laredo","raw_affiliation_strings":["IBM T. J. Watson Research Center, Yorktown Heights, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076968229","display_name":"Alessandro Morari","orcid":"https://orcid.org/0009-0005-5006-8817"},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alessandro Morari","raw_affiliation_strings":["IBM T. J. Watson Research Center, Yorktown Heights, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102708148","display_name":"Edward S. Epstein","orcid":"https://orcid.org/0009-0005-0592-5824"},"institutions":[{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Edward Epstein","raw_affiliation_strings":["IBM T. J. Watson Research Center, Yorktown Heights, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM T. J. Watson Research Center, Yorktown Heights, NY, USA","institution_ids":["https://openalex.org/I4210114115"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039482466","display_name":"Tsung-Nan Lin","orcid":"https://orcid.org/0000-0001-5659-1194"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]},{"id":"https://openalex.org/I99613584","display_name":"National Taipei University","ror":"https://ror.org/03e29r284","country_code":"TW","type":"education","lineage":["https://openalex.org/I99613584"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Tsungnan Lin","raw_affiliation_strings":["National Taiwan University, Taipei City, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taipei City, Taiwan","institution_ids":["https://openalex.org/I99613584","https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102728060","display_name":"Bo Yang","orcid":"https://orcid.org/0000-0003-1766-4025"},"institutions":[{"id":"https://openalex.org/I4210126794","display_name":"IBM Research (China)","ror":"https://ror.org/02yg1pf55","country_code":"CN","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210126794"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Yang","raw_affiliation_strings":["IBM Research, Bejing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research, Bejing, China","institution_ids":["https://openalex.org/I4210126794"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100760738","display_name":"Zhongqing Su","orcid":"https://orcid.org/0000-0003-1524-0026"},"institutions":[{"id":"https://openalex.org/I4210126794","display_name":"IBM Research (China)","ror":"https://ror.org/02yg1pf55","country_code":"CN","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210126794"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhong Su","raw_affiliation_strings":["IBM Research, Bejing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research, Bejing, China","institution_ids":["https://openalex.org/I4210126794"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5086021292"],"corresponding_institution_ids":["https://openalex.org/I4210114115"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890},"fwci":1.8255,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.84698527,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"29","issue":"2","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.552405059337616},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5521963834762573},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.4486178159713745},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10345298051834106}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.552405059337616},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5521963834762573},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.4486178159713745},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10345298051834106},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10664-023-10405-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10664-023-10405-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10664-023-10405-9.pdf","source":{"id":"https://openalex.org/S109852484","display_name":"Empirical Software Engineering","issn_l":"1382-3256","issn":["1382-3256","1573-7616"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Empirical Software Engineering","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10664-023-10405-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10664-023-10405-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10664-023-10405-9.pdf","source":{"id":"https://openalex.org/S109852484","display_name":"Empirical Software Engineering","issn_l":"1382-3256","issn":["1382-3256","1573-7616"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Empirical Software Engineering","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4392021591.pdf"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W28412257","https://openalex.org/W1495368328","https://openalex.org/W1563577331","https://openalex.org/W1771830246","https://openalex.org/W1964731418","https://openalex.org/W1981276685","https://openalex.org/W1988918904","https://openalex.org/W1989657183","https://openalex.org/W1992012690","https://openalex.org/W1992114977","https://openalex.org/W2021616144","https://openalex.org/W2119648923","https://openalex.org/W2129065328","https://openalex.org/W2132464057","https://openalex.org/W2148357053","https://openalex.org/W2155735696","https://openalex.org/W2159613309","https://openalex.org/W2161443453","https://openalex.org/W2295598076","https://openalex.org/W2561266335","https://openalex.org/W2592125937","https://openalex.org/W2624094989","https://openalex.org/W2624308770","https://openalex.org/W2735834919","https://openalex.org/W2781491433","https://openalex.org/W2798352717","https://openalex.org/W2808620986","https://openalex.org/W2954121408","https://openalex.org/W2962960733","https://openalex.org/W2963250244","https://openalex.org/W2963764936","https://openalex.org/W2963935794","https://openalex.org/W3033777149","https://openalex.org/W3098605233","https://openalex.org/W3101228802","https://openalex.org/W3107793421","https://openalex.org/W3146720657","https://openalex.org/W3163206498","https://openalex.org/W3198685994","https://openalex.org/W4233410239","https://openalex.org/W4244945045","https://openalex.org/W4285210829","https://openalex.org/W4285716886"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"Abstract":[0],"Static":[1],"analysis":[2,28,99,108,147],"tools":[3,29],"are":[4,30,171],"widely":[5],"used":[6,219],"for":[7,70,205],"vulnerability":[8,71,206],"detection":[9],"as":[10,78],"they":[11,170],"can":[12,217],"analyze":[13],"programs":[14],"with":[15,113],"complex":[16],"behavior":[17],"and":[18,84,86,143,152,201,240,250,261,282],"millions":[19],"of":[20,22,36,42,55],"lines":[21],"code.":[23,89],"Despite":[24],"their":[25],"popularity,":[26],"static":[27,62,107,146,234],"known":[31],"to":[32,46,61,67,102,174,187,220,224,252],"generate":[33,188],"an":[34],"excess":[35],"false":[37,57,227],"positives.":[38],"The":[39,110,121],"recent":[40],"ability":[41],"Machine":[43],"Learning":[44],"models":[45,69,200,204],"learn":[47],"from":[48,74,130,284],"programming":[49],"language":[50],"data":[51],"opens":[52],"new":[53],"possibilities":[54],"reducing":[56],"positives":[58,244],"when":[59],"applied":[60],"analysis.":[63],"However,":[64],"existing":[65],"datasets":[66],"train":[68,195],"identification":[72,207],"suffer":[73],"multiple":[75,131],"limitations":[76],"such":[77,154],"limited":[79,82],"bug":[80,140],"context,":[81],"size,":[83],"synthetic":[85],"unrealistic":[87],"source":[88,133],"We":[90,184,193,212,266],"propose":[91],"Differential":[92],"Dataset":[93],"Analysis":[94],"or":[95],"D2A,":[96],"a":[97,161,189,222,270],"differential":[98],"based":[100,272],"approach":[101,115],"label":[103],"issues":[104,158,231],"reported":[105,232],"by":[106,126,181,233],"tools.":[109],"dataset":[111,123,216,258,263],"built":[112,125],"this":[114],"is":[116,124],"called":[117],"the":[118,149,166,182,209,215,230,253,257,262,274,285],"D2A":[119,122,186,210,275],"dataset.":[120,192,211],"analyzing":[127],"version":[128,163],"pairs":[129],"open":[132],"projects.":[134],"From":[135],"each":[136],"project,":[137],"we":[138,144,255],"select":[139],"fixing":[141],"commits":[142],"run":[145],"on":[148,273],"versions":[150],"before":[151],"after":[153],"commits.":[155],"If":[156],"some":[157],"detected":[159],"in":[160,165],"before-commit":[162],"disappear":[164],"corresponding":[167],"after-commit":[168],"version,":[169],"very":[172],"likely":[173],"be":[175,218],"real":[176],"bugs":[177],"that":[178,214],"got":[179],"fixed":[180],"commit.":[183],"use":[185],"large":[190],"labeled":[191],"then":[194],"both":[196],"classic":[197],"machine":[198],"learning":[199,203],"deep":[202],"using":[208],"show":[213],"build":[221],"classifier":[223],"identify":[225],"possible":[226],"alarms":[228],"among":[229],"analysis,":[235],"hence":[236],"helping":[237],"developers":[238],"prioritize":[239],"investigate":[241],"potential":[242],"true":[243],"first.":[245],"To":[246],"facilitate":[247],"future":[248],"research":[249],"contribute":[251],"community,":[254],"make":[256],"generation":[259],"pipeline":[260],"publicly":[264],"available.":[265],"have":[267],"also":[268],"created":[269],"leaderboard":[271],"dataset,":[276],"which":[277],"has":[278],"already":[279],"attracted":[280],"attention":[281],"participation":[283],"community.":[286]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-13T06:13:01.061226","created_date":"2025-10-10T00:00:00"}
