{"id":"https://openalex.org/W2584866966","doi":"https://doi.org/10.1109/bigdata.2016.7841028","title":"Automated big security text pruning and classification","display_name":"Automated big security text pruning and classification","publication_year":2016,"publication_date":"2016-12-01","ids":{"openalex":"https://openalex.org/W2584866966","doi":"https://doi.org/10.1109/bigdata.2016.7841028","mag":"2584866966"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2016.7841028","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7841028","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082523735","display_name":"Khudran Alzhrani","orcid":"https://orcid.org/0000-0003-2212-0233"},"institutions":[{"id":"https://openalex.org/I888729015","display_name":"University of Colorado Colorado Springs","ror":"https://ror.org/054spjc55","country_code":"US","type":"education","lineage":["https://openalex.org/I888729015"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Khudran Alzhrani","raw_affiliation_strings":["Department of Computer Science, University of Colorado at Colorado Springs"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Colorado at Colorado Springs","institution_ids":["https://openalex.org/I888729015"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077670684","display_name":"Ethan M. Rudd","orcid":"https://orcid.org/0000-0001-8831-5514"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ethan M. Rudd","raw_affiliation_strings":["Department of Computer Science, Vision and Security Technology (VAST) Lab"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Vision and Security Technology (VAST) Lab","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102742934","display_name":"C. Edward Chow","orcid":"https://orcid.org/0000-0002-6870-2952"},"institutions":[{"id":"https://openalex.org/I888729015","display_name":"University of Colorado Colorado Springs","ror":"https://ror.org/054spjc55","country_code":"US","type":"education","lineage":["https://openalex.org/I888729015"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Edward Chow","raw_affiliation_strings":["Department of Computer Science, University of Colorado at Colorado Springs"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Colorado at Colorado Springs","institution_ids":["https://openalex.org/I888729015"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049661026","display_name":"Terrance E. Boult","orcid":"https://orcid.org/0000-0001-5007-2529"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Terrance E. Boult","raw_affiliation_strings":["Department of Computer Science, Vision and Security Technology (VAST) Lab"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Vision and Security Technology (VAST) Lab","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5082523735"],"corresponding_institution_ids":["https://openalex.org/I888729015"],"apc_list":null,"apc_paid":null,"fwci":0.7181,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.79190194,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"3629","last_page":"3637"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8261072039604187},{"id":"https://openalex.org/keywords/backup","display_name":"Backup","score":0.612701952457428},{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.6009240746498108},{"id":"https://openalex.org/keywords/encryption","display_name":"Encryption","score":0.5755614638328552},{"id":"https://openalex.org/keywords/cloud-storage","display_name":"Cloud storage","score":0.4650263786315918},{"id":"https://openalex.org/keywords/ciphertext","display_name":"Ciphertext","score":0.46308737993240356},{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.45928311347961426},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4584798812866211},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.45551103353500366},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4353214502334595},{"id":"https://openalex.org/keywords/paragraph","display_name":"Paragraph","score":0.4312141239643097},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4159741699695587},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3901669979095459},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3654104769229889},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.3653036653995514},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.35419636964797974},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.346310019493103},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32572662830352783},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.18656989932060242},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10533636808395386}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8261072039604187},{"id":"https://openalex.org/C2780945871","wikidata":"https://www.wikidata.org/wiki/Q194274","display_name":"Backup","level":2,"score":0.612701952457428},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.6009240746498108},{"id":"https://openalex.org/C148730421","wikidata":"https://www.wikidata.org/wiki/Q141090","display_name":"Encryption","level":2,"score":0.5755614638328552},{"id":"https://openalex.org/C2777059624","wikidata":"https://www.wikidata.org/wiki/Q914359","display_name":"Cloud storage","level":3,"score":0.4650263786315918},{"id":"https://openalex.org/C93974786","wikidata":"https://www.wikidata.org/wiki/Q1589480","display_name":"Ciphertext","level":3,"score":0.46308737993240356},{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.45928311347961426},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4584798812866211},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.45551103353500366},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4353214502334595},{"id":"https://openalex.org/C2777206241","wikidata":"https://www.wikidata.org/wiki/Q194431","display_name":"Paragraph","level":2,"score":0.4312141239643097},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4159741699695587},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3901669979095459},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3654104769229889},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3653036653995514},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.35419636964797974},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.346310019493103},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32572662830352783},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.18656989932060242},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10533636808395386}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata.2016.7841028","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2016.7841028","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5099999904632568}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W21681092","https://openalex.org/W27929844","https://openalex.org/W50596167","https://openalex.org/W1511720087","https://openalex.org/W1567649426","https://openalex.org/W1682759715","https://openalex.org/W1972008221","https://openalex.org/W2001082470","https://openalex.org/W2003115932","https://openalex.org/W2004228899","https://openalex.org/W2017648122","https://openalex.org/W2018916405","https://openalex.org/W2027876852","https://openalex.org/W2106035193","https://openalex.org/W2114296561","https://openalex.org/W2127452375","https://openalex.org/W2146341620","https://openalex.org/W2147693472","https://openalex.org/W2151915331","https://openalex.org/W2153383412","https://openalex.org/W2167460663","https://openalex.org/W2174706414","https://openalex.org/W2275530856","https://openalex.org/W2620229640","https://openalex.org/W2963272729","https://openalex.org/W3126116864","https://openalex.org/W6601113570","https://openalex.org/W6602057100","https://openalex.org/W6682087222","https://openalex.org/W6694821995"],"related_works":["https://openalex.org/W2888805565","https://openalex.org/W4312773271","https://openalex.org/W4315588616","https://openalex.org/W2769501189","https://openalex.org/W2962686197","https://openalex.org/W2207653751","https://openalex.org/W3159709618","https://openalex.org/W2611137333","https://openalex.org/W3005513013","https://openalex.org/W4389543811"],"abstract_inverted_index":{"Many":[0],"security":[1,101,132],"related":[2],"big":[3],"data":[4,85],"problems,":[5],"including":[6],"document,":[7],"traffic,":[8],"and":[9,39,56,72,88,105],"system":[10],"log":[11],"analysis":[12,14],"require":[13,40],"of":[15,21,128,131],"unstructured":[16],"text.":[17],"Consider":[18],"the":[19,51,66,124,145,151,169],"task":[20],"analyzing":[22],"company":[23],"documents":[24,142],"for":[25],"secure":[26],"storage.":[27],"Some":[28],"might":[29],"be":[30,59,63],"too":[31],"sensitive":[32,139],"to":[33,62,77,108,162,173],"put":[34],"on":[35,50,65],"a":[36,175],"public":[37],"cloud":[38,52,67],"private":[41],"storage":[42,89],"with":[43,126],"associated":[44],"backup":[45],"overhead,":[46],"some":[47,57],"may":[48,58],"safe":[49],"in":[53,68,123,168],"encrypted":[54],"form,":[55],"sufficiently":[60],"non-sensitive":[61],"stored":[64],"plain-text":[69],"without":[70],"encryption":[71],"decryption":[73],"overhead.":[74],"Being":[75],"able":[76],"make":[78],"such":[79],"categorizations":[80],"autonomously":[81],"can":[82],"significantly":[83],"strengthen":[84],"security,":[86],"organization,":[87],"efficiency.":[90],"In":[91,113],"this":[92],"paper,":[93],"we":[94,115,136],"analyze":[95],"several":[96],"base":[97,152],"machine":[98],"learning":[99],"based":[100],"risk":[102],"assessment":[103],"algorithms":[104],"develop":[106],"techniques":[107],"improve":[109,149],"upon":[110,150],"standard":[111],"algorithms.":[112],"particular,":[114],"examine":[116],"labeling":[117,120],"document":[118,125],"sensitivity,":[119],"each":[121],"paragraph":[122],"one":[127],"three":[129],"levels":[130],"risk.":[133],"For":[134],"evaluation,":[135],"use":[137],"real":[138],"texts,":[140],"from":[141,165],"leaked":[143],"by":[144],"WikiLeaks":[146],"organization.":[147],"We":[148],"models":[153],"using":[154],"probabilistic":[155],"topic":[156],"modeling":[157],"via":[158],"Latent":[159],"Dirichlet":[160],"Analysis":[161],"identify":[163],"samples":[164],"impure":[166],"subtopics":[167],"training":[170,174],"set,":[171],"prior":[172],"logistic":[176],"regression":[177],"classifier.":[178]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2026-01-13T01:12:25.745995","created_date":"2025-10-10T00:00:00"}
