{"id":"https://openalex.org/W4402438184","doi":"https://doi.org/10.1145/3674029.3674067","title":"Learning the Structure of Commands by Retraining a Language Model","display_name":"Learning the Structure of Commands by Retraining a Language Model","publication_year":2024,"publication_date":"2024-05-24","ids":{"openalex":"https://openalex.org/W4402438184","doi":"https://doi.org/10.1145/3674029.3674067"},"language":"en","primary_location":{"id":"doi:10.1145/3674029.3674067","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3674029.3674067","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 9th International Conference on Machine Learning Technologies (ICMLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3674029.3674067","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054700321","display_name":"Zafar Hussain","orcid":"https://orcid.org/0000-0002-0717-8181"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Zafar Hussain","raw_affiliation_strings":["Computer Science, University of Helsinki, Finland"],"raw_orcid":"https://orcid.org/0000-0002-0717-8181","affiliations":[{"raw_affiliation_string":"Computer Science, University of Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006567200","display_name":"Lalli Myllyaho","orcid":"https://orcid.org/0000-0002-0953-9825"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Lalli Myllyaho","raw_affiliation_strings":["Computer Science, University of Helsinki, Finland"],"raw_orcid":"https://orcid.org/0000-0002-0953-9825","affiliations":[{"raw_affiliation_string":"Computer Science, University of Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042260991","display_name":"Jukka K. Nurminen","orcid":"https://orcid.org/0000-0001-5083-1927"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Jukka K. Nurminen","raw_affiliation_strings":["Computer Science, University of Helsinki, Finland"],"raw_orcid":"https://orcid.org/0000-0001-5083-1927","affiliations":[{"raw_affiliation_string":"Computer Science, University of Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5054700321"],"corresponding_institution_ids":["https://openalex.org/I133731052"],"apc_list":null,"apc_paid":null,"fwci":0.3364,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.60174106,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"239","last_page":"244"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9876999855041504,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10820","display_name":"Fuzzy Logic and Control Systems","score":0.9833999872207642,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/retraining","display_name":"Retraining","score":0.833823561668396},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7331132888793945},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46091726422309875},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4482099711894989},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.42721959948539734},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.34380823373794556}],"concepts":[{"id":"https://openalex.org/C2778712577","wikidata":"https://www.wikidata.org/wiki/Q3505966","display_name":"Retraining","level":2,"score":0.833823561668396},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7331132888793945},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46091726422309875},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4482099711894989},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.42721959948539734},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.34380823373794556},{"id":"https://openalex.org/C155202549","wikidata":"https://www.wikidata.org/wiki/Q178803","display_name":"International trade","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3674029.3674067","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3674029.3674067","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 9th International Conference on Machine Learning Technologies (ICMLT)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3674029.3674067","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3674029.3674067","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 9th International Conference on Machine Learning Technologies (ICMLT)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1987971958","https://openalex.org/W1997037234","https://openalex.org/W2005662348","https://openalex.org/W2051224630","https://openalex.org/W2079309933","https://openalex.org/W2085487226","https://openalex.org/W2119463329","https://openalex.org/W2491695530","https://openalex.org/W2811163328","https://openalex.org/W3160137267","https://openalex.org/W3199174176","https://openalex.org/W4327503230","https://openalex.org/W4382200999","https://openalex.org/W4382322607"],"related_works":["https://openalex.org/W2081982437","https://openalex.org/W4394857231","https://openalex.org/W2027050655","https://openalex.org/W3028244590","https://openalex.org/W4254349500","https://openalex.org/W2014369232","https://openalex.org/W3122042562","https://openalex.org/W2050078012","https://openalex.org/W2060761133","https://openalex.org/W3204019825"],"abstract_inverted_index":{"In":[0,139],"the":[1,6,21,69,94,129,159],"field":[2],"of":[3,23,31,108,162],"cybersecurity,":[4],"learning":[5,136],"command-line":[7,24,34,86,163],"commands\u2019":[8,164],"syntax":[9,22,102],"holds":[10],"paramount":[11],"importance":[12],"in":[13,97,135,147],"distinguishing":[14],"valid":[15],"and":[16,40,62,80,100,123,133],"malicious":[17,152],"commands.":[18],"To":[19],"learn":[20],"commands,":[25,35],"we":[26],"curated":[27],"an":[28],"extensive":[29],"dataset":[30],"Windows":[32],"10":[33],"developed":[36],"a":[37,42,47,58,63],"specialized":[38],"vocabulary,":[39],"trained":[41],"custom":[43],"tokenizer":[44],"equipped":[45],"with":[46,103],"masked":[48],"language":[49,70,142],"model":[50,143],"head.":[51],"Comparative":[52],"analyses":[53,107],"against":[54,151],"traditional":[55],"methods,":[56],"including":[57],"second-order":[59],"Markov":[60],"Model":[61],"Regular":[64],"Expression-based":[65],"system,":[66],"unequivocally":[67],"demonstrated":[68],"model\u2019s":[71,95,130],"superior":[72],"proficiency.":[73],"Employing":[74],"clustering":[75],"algorithms":[76],"like":[77],"DBSCAN,":[78],"HDBSCAN,":[79],"OPTICS":[81],"allowed":[82],"us":[83],"to":[84,158],"categorize":[85],"commands":[87],"based":[88],"on":[89],"their":[90],"syntactical":[91],"similarities,":[92],"revealing":[93],"excellence":[96],"understanding":[98],"sequences":[99],"detecting":[101],"minimal":[104],"noise.":[105],"Manual":[106],"command":[109,137],"syntax,":[110],"complemented":[111],"by":[112],"BERTScore":[113],"assessments,":[114],"consistently":[115],"yielded":[116],"metrics":[117],"exceeding":[118],"0.90":[119],"for":[120],"precision,":[121],"recall,":[122],"F1-score.":[124],"These":[125],"robust":[126],"results":[127],"affirm":[128],"high":[131],"accuracy":[132],"effectiveness":[134],"syntax.":[138,165],"conclusion,":[140],"our":[141],"not":[144],"only":[145],"helps":[146],"enhancing":[148],"protective":[149],"measures":[150],"activities":[153],"but":[154],"also":[155],"showcases":[156],"adaptability":[157],"ever-evolving":[160],"nature":[161]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-23T23:11:35.936235","created_date":"2025-10-10T00:00:00"}
