{"id":"https://openalex.org/W4319736724","doi":"https://doi.org/10.1145/3579638","title":"Extraction of Phrase-based Concepts in Vulnerability Descriptions through Unsupervised Labeling","display_name":"Extraction of Phrase-based Concepts in Vulnerability Descriptions through Unsupervised Labeling","publication_year":2023,"publication_date":"2023-02-09","ids":{"openalex":"https://openalex.org/W4319736724","doi":"https://doi.org/10.1145/3579638"},"language":"en","primary_location":{"id":"doi:10.1145/3579638","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3579638","pdf_url":null,"source":{"id":"https://openalex.org/S142627899","display_name":"ACM Transactions on Software Engineering and Methodology","issn_l":"1049-331X","issn":["1049-331X","1557-7392"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Software Engineering and Methodology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009280810","display_name":"Sofonias Yitagesu","orcid":"https://orcid.org/0000-0002-9247-7521"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sofonias Yitagesu","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028641941","display_name":"Zhenchang Xing","orcid":"https://orcid.org/0000-0001-7663-1421"},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"government","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Zhenchang Xing","raw_affiliation_strings":["CSIRO\u2019s Data61 and School of Computing, Australia National University, Australia","CSIRO's Data61 and School of Computing, Australia National University, Australia"],"affiliations":[{"raw_affiliation_string":"CSIRO\u2019s Data61 and School of Computing, Australia National University, Australia","institution_ids":["https://openalex.org/I42894916"]},{"raw_affiliation_string":"CSIRO's Data61 and School of Computing, Australia National University, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I1292875679"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030524599","display_name":"Xiaowang Zhang","orcid":"https://orcid.org/0000-0002-3931-3886"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaowang Zhang","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100736532","display_name":"Zhiyong Feng","orcid":"https://orcid.org/0000-0001-8158-7453"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyong Feng","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100451509","display_name":"Xiaohong Li","orcid":"https://orcid.org/0000-0002-0752-6764"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaohong Li","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041328957","display_name":"Linyi Han","orcid":"https://orcid.org/0000-0002-9747-4426"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linyi Han","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5009280810"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":4.983,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.95317726,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"32","issue":"5","first_page":"1","last_page":"45"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9818999767303467,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9814000129699707,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12479","display_name":"Web Application Security Vulnerabilities","score":0.9757999777793884,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8966854810714722},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6622872948646545},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5743080377578735},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5097064971923828},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.48554012179374695},{"id":"https://openalex.org/keywords/vulnerability","display_name":"Vulnerability (computing)","score":0.46959415078163147},{"id":"https://openalex.org/keywords/phrase","display_name":"Phrase","score":0.4275953471660614},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.41895365715026855},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.10242247581481934}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8966854810714722},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6622872948646545},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5743080377578735},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5097064971923828},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.48554012179374695},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.46959415078163147},{"id":"https://openalex.org/C2776224158","wikidata":"https://www.wikidata.org/wiki/Q187931","display_name":"Phrase","level":2,"score":0.4275953471660614},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.41895365715026855},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.10242247581481934}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3579638","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3579638","pdf_url":null,"source":{"id":"https://openalex.org/S142627899","display_name":"ACM Transactions on Software Engineering and Methodology","issn_l":"1049-331X","issn":["1049-331X","1557-7392"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Software Engineering and Methodology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.4300000071525574,"id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4304333080","display_name":null,"funder_award_id":"61972455","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W10548402","https://openalex.org/W579397108","https://openalex.org/W1976635144","https://openalex.org/W2033599040","https://openalex.org/W2187089797","https://openalex.org/W2343954916","https://openalex.org/W2527802371","https://openalex.org/W2531563875","https://openalex.org/W2597601064","https://openalex.org/W2602856279","https://openalex.org/W2911254434","https://openalex.org/W2911479648","https://openalex.org/W2913668833","https://openalex.org/W2940125701","https://openalex.org/W2948226814","https://openalex.org/W2959224760","https://openalex.org/W2962739339","https://openalex.org/W2970557265","https://openalex.org/W2974159469","https://openalex.org/W3023882301","https://openalex.org/W3093190191","https://openalex.org/W3098605233","https://openalex.org/W3105146463","https://openalex.org/W3171557651","https://openalex.org/W3205997337","https://openalex.org/W4205509257","https://openalex.org/W4206299091","https://openalex.org/W4239319433","https://openalex.org/W4294214797","https://openalex.org/W6757741565","https://openalex.org/W6802327984","https://openalex.org/W6819575453"],"related_works":["https://openalex.org/W2502722637","https://openalex.org/W2250591306","https://openalex.org/W2167662847","https://openalex.org/W1551406738","https://openalex.org/W1794016765","https://openalex.org/W3186232876","https://openalex.org/W1659887931","https://openalex.org/W2293457016","https://openalex.org/W2977842567","https://openalex.org/W2369308426"],"abstract_inverted_index":{"Software":[0],"vulnerabilities,":[1],"once":[2],"disclosed,":[3],"can":[4,378,403],"be":[5,379,404],"documented":[6],"in":[7,29,53,89,153,164,202,351,369,386],"vulnerability":[8,16,50,59,87,91,101,349,363,376,397,449],"databases,":[9],"which":[10,334,390],"have":[11],"great":[12],"potential":[13],"to":[14,68,82,121,182,197,212,218,269,318,382,406,430,466],"advance":[15],"analysis":[17,48],"and":[18,36,84,112,123,133,156,206,238,245,262,265,296,309,344,426,470],"security":[19,220],"research.":[20],"People":[21],"describe":[22],"the":[23,46,144,165,178,191,203,207,219,226,231,252,276,284,304,322,329,345,352,383,387,432,435,456,467],"key":[24,58,141,253],"characteristics":[25],"of":[26,49,57,99,146,243,279,307,321,324,331,348,362,393,396,434],"software":[27],"vulnerabilities":[28],"natural":[30,235,246],"language":[31,236,247],"mixed":[32],"with":[33,289,445,455],"domain-specific":[34],"names":[35],"concepts.":[37,325,398,438],"This":[38],"textual":[39,90],"nature":[40,278],"poses":[41],"a":[42,140,172,184,240,299,336,366],"significant":[43,66],"challenge":[44],"for":[45,72,292,340,411],"automatic":[47],"knowledge":[51],"embedded":[52],"text.":[54],"Automatic":[55],"extraction":[56,409,422],"aspects":[60],"is":[61,137,195,210],"highly":[62],"desirable":[63],"but":[64],"demands":[65],"effort":[67],"manually":[69,458],"label":[70,83,122,359],"data":[71,294],"model":[73,194,209],"training.":[74],"In":[75,303,415],"this":[76,416],"article,":[77],"we":[78,170,255,282,312,418],"propose":[79,256],"unsupervised":[80,315,371,474],"methods":[81,423],"extract":[85,124],"important":[86],"concepts":[88,102,350,364,377,450],"descriptions":[92],"(TVDs).":[93],"We":[94],"focus":[95],"on":[96,139,234,251],"six":[97,360,394],"types":[98,361,395],"phrase-based":[100],"(vulnerability":[103],"type,":[104,110],"vulnerable":[105],"component,":[106],"root":[107],"cause,":[108],"attacker":[109],"impact,":[111],"attack":[113],"vector)":[114],"as":[115],"they":[116,151],"are":[117],"much":[118],"more":[119],"difficult":[120],"than":[125],"name-":[126],"or":[127],"number-based":[128],"entities":[129],"(i.e.,":[130],"vendor,":[131],"product,":[132],"version).":[134],"Our":[135,222,326,439],"approach":[136],"based":[138,250],"observation":[142],"that":[143,176,225,442],"same-type":[145,323],"phrases,":[147],"no":[148],"matter":[149],"how":[150],"differ":[152],"sentence":[154,166],"structures":[155],"phrase":[157],"expressions,":[158],"usually":[159],"share":[160],"syntactically":[161],"similar":[162],"paths":[163,261],"parsing":[167],"trees.":[168],"Specifically,":[169],"present":[171,419],"source-target":[173],"neural":[174,193],"architecture":[175],"learns":[177],"Part-of-Speech":[179],"(POS)":[180],"tagging":[181],"identify":[183,213],"token\u2019s":[185],"functional":[186],"role":[187],"within":[188],"TVDs,":[189,389],"where":[190],"source":[192],"trained":[196,211,444,454],"capture":[198],"common":[199],"features":[200],"found":[201],"TVD":[204,367,413,460],"corpus,":[205],"target":[208],"linguistically":[214],"malformed":[215],"words":[216],"specific":[217],"domain.":[221],"evaluation":[223,327],"confirms":[224,328],"proposed":[227],"tagger":[228],"outperforms":[229],"(4.45%\u20135.98%)":[230],"taggers":[232],"designed":[233],"notions":[237],"identifies":[239],"broad":[241],"set":[242],"TVDs":[244,402],"contents.":[248],"Then,":[249],"observations,":[254],"two":[257,420,457],"path":[258,342],"representations":[259,343],"(absolute":[260],"relative":[263,310],"paths)":[264],"use":[266],"an":[267,370],"auto-encoder":[268],"encode":[270],"such":[271],"syntactic":[272],"similarities.":[273],"To":[274],"address":[275],"discrete":[277],"our":[280,332,446,473],"paths,":[281,311],"enhance":[283],"traditional":[285],"Variational":[286],"Auto-encoder":[287],"(VAE)":[288],"Gumble-Max":[290],"trick":[291],"categorical":[293],"distribution":[295],"thus":[297],"create":[298],"Categorical":[300],"VAE":[301],"(CaVAE).":[302],"latent":[305],"space":[306],"absolute":[308],"further":[313],"apply":[314],"clustering":[316],"techniques":[317],"generate":[319],"clusters":[320,357],"effectiveness":[330],"CaVAE,":[333],"achieves":[335],"small":[337],"(85.85)":[338],"log-likelihood":[339],"encoding":[341],"accuracy":[346],"(83%\u201389%)":[347],"resulting":[353,356,400],"clusters.":[354],"The":[355,399],"accurately":[358],"from":[365,462],"corpus":[368],"way.":[372],"Furthermore,":[373],"these":[374],"labeled":[375,401,437,448,459],"mapped":[380],"back":[381],"corresponding":[384],"phrases":[385],"original":[388],"produce":[391],"labels":[392],"used":[405],"train":[407],"concept":[408,421],"models":[410,443],"other":[412],"corpora.":[414],"work,":[417],"(concept":[424],"classification":[425],"sequence":[427],"labeling":[428,475],"model)":[429],"demonstrate":[431],"utility":[433],"unsupervisedly":[436,447],"study":[440],"shows":[441],"outperform":[451],"(3.9%\u20135.14%)":[452],"those":[453],"datasets":[461],"previous":[463],"work":[464],"due":[465],"consistent":[468],"boundary":[469],"typing":[471],"by":[472],"method.":[476]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":4}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
