{"id":"https://openalex.org/W6907186091","doi":"https://doi.org/10.21227/3gnj-b288","title":"\"LLM Finetuning Q&A - Cybersecurity, IT Support Dataset\"","display_name":"\"LLM Finetuning Q&A - Cybersecurity, IT Support Dataset\"","publication_year":2025,"publication_date":"2025-08-09","ids":{"openalex":"https://openalex.org/W6907186091","doi":"https://doi.org/10.21227/3gnj-b288"},"language":"en","primary_location":{"id":"doi:10.21227/3gnj-b288","is_oa":true,"landing_page_url":"https://doi.org/10.21227/3gnj-b288","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.21227/3gnj-b288","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Chaithanya Vamshi Sai","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chaithanya Vamshi Sai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Nouh Sabri  Elmitwally","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nouh Sabri  Elmitwally","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Iain  Rice","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Iain  Rice","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Haitham  Mahmoud","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haitham  Mahmoud","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ian  Vickers","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ian  Vickers","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Xavier  Schmoor","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xavier  Schmoor","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.4823000133037567,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.4823000133037567,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.24369999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.02280000038444996,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cloud-computing","display_name":"Cloud computing","score":0.5965999960899353},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5759000182151794},{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.5164999961853027},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.45570001006126404},{"id":"https://openalex.org/keywords/raw-data","display_name":"Raw data","score":0.4117000102996826},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4108000099658966},{"id":"https://openalex.org/keywords/phone","display_name":"Phone","score":0.40119999647140503}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7868000268936157},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.5965999960899353},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5759000182151794},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.5164999961853027},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5090000033378601},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.45570001006126404},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.44449999928474426},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.4117000102996826},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4108000099658966},{"id":"https://openalex.org/C2778707766","wikidata":"https://www.wikidata.org/wiki/Q202064","display_name":"Phone","level":2,"score":0.40119999647140503},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.34290000796318054},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.3174999952316284},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.31349998712539673},{"id":"https://openalex.org/C2776240099","wikidata":"https://www.wikidata.org/wiki/Q327018","display_name":"Interrogation","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.30480000376701355},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.2874000072479248},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C2776639384","wikidata":"https://www.wikidata.org/wiki/Q840396","display_name":"Ideal (ethics)","level":2,"score":0.26100000739097595}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21227/3gnj-b288","is_oa":true,"landing_page_url":"https://doi.org/10.21227/3gnj-b288","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.21227/3gnj-b288","is_oa":true,"landing_page_url":"https://doi.org/10.21227/3gnj-b288","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"\"LLM":[0],"Finetuning":[1],"Q&amp;A":[2],"-":[3],"Cybersecurity,":[4],"IT":[5,28,169,280],"Support":[6],"DatasetA":[7],"comprehensive,":[8],"high-quality":[9],"dataset":[10,34,52,126,256,269,299],"of":[11,68,124,165,245,291,327],"11,000":[12],"question-and-answer":[13,183],"(Q&amp;A)":[14],"pairs":[15],"designed":[16,101],"to":[17,84,102,110,191,241,321],"fine-tune":[18],"large":[19],"language":[20],"models":[21],"(LLMs)":[22],"for":[23,37,107,310],"specialised":[24,121],"domains":[25],"like":[26,74,231],"cybersecurity,":[27],"support,":[29],"and":[30,39,45,49,61,79,96,112,115,147,157,168,197,221,260,279,289],"cloud":[31,148],"computing.":[32],"This":[33,185,247,282,317],"is":[35,53,76,81,91,100,300],"ideal":[36],"researchers":[38],"developers":[40],"working":[41],"on":[42],"building":[43],"sophisticated":[44],"accurate":[46],"conversational":[47],"AI":[48,329],"question-answering":[50],"systems.The":[51],"meticulously":[54],"structured":[55],"with":[56,294],"two":[57],"primary":[58],"columns:":[59],"\\\"Question\\\"":[60],"\\\"Answer.\\\"":[62],"It":[63],"addresses":[64],"a":[65,77,104,128,144,187,236,271,303],"wide":[66],"array":[67],"topics,":[69],"from":[70,136,142,208,214],"fundamental":[71],"cybersecurity":[72,146,166,277],"concepts":[73],"\\\"What":[75,90],"VPN":[78],"why":[80],"it":[82,307],"used?\\\"":[83],"more":[85],"technical":[86,199,287],"inquiries":[87],"such":[88],"as":[89],"the":[92,202,255,267,286,292,325],"difference":[93],"between":[94],"IDS":[95],"IPS?\\\".":[97],"The":[98],"content":[99],"be":[103],"valuable":[105],"resource":[106],"training":[108,315],"LLMs":[109,230],"comprehend":[111],"generate":[113],"nuanced":[114],"contextually":[116],"relevant":[117],"responses":[118],"in":[119,253,302,324],"highly":[120],"fields.The":[122],"creation":[123],"this":[125,174],"was":[127,134,206,226,251],"multi-faceted":[129],"process.":[130],"Initially,":[131],"real-world":[132],"data":[133,225,249],"gathered":[135],"approximately":[137],"7,000":[138],"customer":[139],"support":[140,179],"tickets":[141,180],"METCLOUD,":[143],"UK-based":[145],"computing":[149],"services":[150],"company.":[151],"These":[152],"tickets,":[153],"logged":[154],"via":[155],"phone":[156],"email,":[158],"captured":[159],"authentic":[160],"client":[161],"interactions.":[162],"A":[163],"team":[164],"experts":[167],"specialists":[170],"then":[171],"manually":[172],"pre-processed":[173],"raw":[175],"data,":[176],"transforming":[177],"unstructured":[178],"into":[181,312],"standardised":[182],"pairs.":[184],"involved":[186],"rigorous":[188],"cleaning":[189],"process":[190],"remove":[192],"irrelevant":[193],"information,":[194],"correct":[195],"errors,":[196],"standardise":[198],"terms.To":[200],"enrich":[201],"dataset,":[203],"domain-specific":[204,328],"knowledge":[205],"extracted":[207],"authoritative":[209],"online":[210],"sources,":[211],"including":[212],"documentation":[213],"Microsoft,":[215],"Hewlett-Packard":[216],"Enterprise":[217],"(HPE),":[218],"MITRE":[219],"ATT&amp;CK,":[220],"Cisco.":[222],"Furthermore,":[223],"synthetic":[224,248],"generated":[227],"using":[228],"state-of-the-art":[229],"OpenAI's":[232],"GPT-4o,":[233],"guided":[234],"by":[235,276],"systematically":[237],"created":[238],"topic":[239],"tree":[240],"ensure":[242],"comprehensive":[243],"coverage":[244],"subtopics.":[246],"generation":[250],"crucial":[252],"expanding":[254],"while":[257],"maintaining":[258],"relevance":[259],"mitigating":[261],"privacy":[262],"concerns.Before":[263],"its":[264],"final":[265],"compilation,":[266],"entire":[268],"underwent":[270],"thorough":[272],"quality":[273],"assurance":[274],"review":[275],"engineers":[278],"specialists.":[281],"expert-led":[283],"validation":[284],"ensures":[285],"accuracy":[288],"alignment":[290],"answers":[293],"current":[295],"industry":[296],"best":[297],"practices.The":[298],"provided":[301],"user-friendly":[304],"format,":[305],"making":[306],"readily":[308],"available":[309],"integration":[311],"various":[313],"model":[314],"pipelines.":[316],"public":[318],"release":[319],"aims":[320],"foster":[322],"advancements":[323],"development":[326],"applications.\"":[330]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
