{"id":"https://openalex.org/W4402443038","doi":"https://doi.org/10.1145/3650212.3652124","title":"SCALE: Constructing Structured Natural Language Comment Trees for Software Vulnerability Detection","display_name":"SCALE: Constructing Structured Natural Language Comment Trees for Software Vulnerability Detection","publication_year":2024,"publication_date":"2024-09-11","ids":{"openalex":"https://openalex.org/W4402443038","doi":"https://doi.org/10.1145/3650212.3652124"},"language":"en","primary_location":{"id":"doi:10.1145/3650212.3652124","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3650212.3652124","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047620514","display_name":"Xin-Cheng Wen","orcid":"https://orcid.org/0000-0002-2115-9921"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xin-Cheng Wen","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-2115-9921","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061699251","display_name":"Cuiyun Gao","orcid":"https://orcid.org/0000-0003-4774-2434"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cuiyun Gao","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0003-4774-2434","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012760348","display_name":"Shuzheng Gao","orcid":"https://orcid.org/0000-0002-8102-480X"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Shuzheng Gao","raw_affiliation_strings":["Chinese University of Hong Kong, Hong Kong, China"],"raw_orcid":"https://orcid.org/0000-0002-8102-480X","affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100604658","display_name":"Yang Xiao","orcid":"https://orcid.org/0009-0005-8009-2252"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Xiao","raw_affiliation_strings":["Chinese Academy of Sciences, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-8009-2252","affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5069596903","display_name":"Michael R. Lyu","orcid":"https://orcid.org/0000-0002-3666-5798"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Michael R. Lyu","raw_affiliation_strings":["Chinese University of Hong Kong, Hong Kong, China"],"raw_orcid":"https://orcid.org/0000-0002-3666-5798","affiliations":[{"raw_affiliation_string":"Chinese University of Hong Kong, Hong Kong, China","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5047620514"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":11.936,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.98475307,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"235","last_page":"247"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12479","display_name":"Web Application Security Vulnerabilities","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7194777727127075},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5640307068824768},{"id":"https://openalex.org/keywords/vulnerability","display_name":"Vulnerability (computing)","score":0.5553674101829529},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.46378931403160095},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4164183735847473},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.35175085067749023},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3222253918647766},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2387436032295227},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.12268027663230896},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.07555210590362549},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.07140886783599854}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7194777727127075},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5640307068824768},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.5553674101829529},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.46378931403160095},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4164183735847473},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.35175085067749023},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3222253918647766},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2387436032295227},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.12268027663230896},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.07555210590362549},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.07140886783599854}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3650212.3652124","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3650212.3652124","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1996567876","https://openalex.org/W2132083787","https://openalex.org/W2217433794","https://openalex.org/W2294407885","https://openalex.org/W2795170942","https://openalex.org/W2885030880","https://openalex.org/W2898502769","https://openalex.org/W3030148664","https://openalex.org/W3098605233","https://openalex.org/W3122241445","https://openalex.org/W3137781054","https://openalex.org/W4221166942","https://openalex.org/W4240627753","https://openalex.org/W4284667406","https://openalex.org/W4285821122","https://openalex.org/W4309796907","https://openalex.org/W4312436517","https://openalex.org/W4380763529","https://openalex.org/W4388483660","https://openalex.org/W4388505012"],"related_works":["https://openalex.org/W2095999892","https://openalex.org/W2018764758","https://openalex.org/W2383689843","https://openalex.org/W1550668881","https://openalex.org/W617913288","https://openalex.org/W2319323865","https://openalex.org/W2951745010","https://openalex.org/W4226226396","https://openalex.org/W3153750606","https://openalex.org/W4308854837"],"abstract_inverted_index":{"Recently,":[0],"there":[1],"has":[2],"been":[3],"a":[4,102],"growing":[5],"interest":[6],"in":[7,25,50,243],"automatic":[8],"software":[9],"vulnerability":[10,94,220],"detection.":[11,95],"Pre-trained":[12],"model-based":[13,32],"approaches":[14,24,33],"have":[15],"demonstrated":[16],"superior":[17],"performance":[18,273],"than":[19],"other":[20],"Deep":[21],"Learning":[22],"(DL)-based":[23],"detecting":[26],"vulnerabilities.":[27],"However,":[28],"the":[29,51,62,65,98,113,127,138,155,160,178,200,205,214,227,231,249,270],"existing":[30],"pre-trained":[31,114,232,262],"generally":[34],"employ":[35],"code":[36,66,86,130,133,163,195,201],"sequences":[37,135],"as":[38,48,72,265],"input":[39],"during":[40],"prediction,":[41],"and":[42,77,175,234,241,252,267],"may":[43],"ignore":[44],"vulnerability-related":[45],"structural":[46],"information,":[47],"reflected":[49],"following":[52],"two":[53],"aspects.":[54],"First,":[55],"they":[56,80],"tend":[57],"to":[58,60,83,158,181,260,278],"fail":[59],"infer":[61,159],"semantics":[63,128,161],"of":[64,129,162,238,245],"statements":[67,131,164],"with":[68,132,204,236],"complex":[69],"logic":[70],"such":[71,264],"those":[73],"containing":[74],"multiple":[75],"operators":[76],"pointers.":[78],"Second,":[79],"are":[81],"hard":[82],"comprehend":[84],"various":[85],"execution":[87,134,196],"sequences,":[88],"which":[89,151,190,211],"is":[90],"essential":[91],"for":[92,172,217],"precise":[93],"To":[96],"mitigate":[97],"challenges,":[99],"we":[100],"propose":[101],"Structured":[103,120,184],"Natural":[104,121,185],"Language":[105,122,169,186],"Comment":[106,123,148,187],"tree-based":[107],"vulnerAbiLity":[108],"dEtection":[109],"framework":[110],"based":[111,136],"on":[112,137,248],"models,":[115,263],"named":[116],".":[117],"The":[118],"proposed":[119],"Tree":[124,149,188],"(SCT)":[125],"integrates":[126],"Abstract":[139],"Syntax":[140],"Trees":[141],"(ASTs).Specifically,":[142],"comprises":[143],"three":[144],"main":[145],"modules:":[146],"(1)":[147],"Construction,":[150,189],"aims":[152,191],"at":[153,192],"enhancing":[154],"model\u2019s":[156],"ability":[157],"by":[165,198],"first":[166],"incorporating":[167],"Large":[168],"Models":[170],"(LLMs)":[171],"comment":[173,179,206],"generation":[174],"then":[176],"adding":[177],"node":[180],"ASTs.":[182],"(2)":[183],"explicitly":[193],"involving":[194],"sequence":[197],"combining":[199],"syntax":[202],"templates":[203],"tree.":[207],"(3)":[208],"SCT-Enhanced":[209],"Representation,":[210],"finally":[212],"incorporates":[213],"constructed":[215],"SCTs":[216],"well":[218],"capturing":[219],"patterns.":[221],"Experimental":[222],"results":[223],"demonstrate":[224],"that":[225],"outperforms":[226],"best-performing":[228],"baseline,":[229],"including":[230],"model":[233],"LLMs,":[235],"improvements":[237],"2.96%,":[239],"13.47%,":[240],"3.75%":[242],"terms":[244],"F1":[246,271],"score":[247,272],"FFMPeg+Qemu,":[250],"Reveal,":[251],"SVulD":[253],"datasets,":[254],"respectively.":[255],"Furthermore,":[256],"can":[257],"be":[258],"applied":[259],"different":[261],"CodeBERT":[266],"UniXcoder,":[268],"yielding":[269],"enhancements":[274],"ranging":[275],"from":[276],"1.37%":[277],"10.87%.":[279]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
