{"id":"https://openalex.org/W7152007463","doi":"https://doi.org/10.48550/arxiv.2604.05719","title":"Hackers or Hallucinators? A Comprehensive Analysis of LLM-Based Automated Penetration Testing","display_name":"Hackers or Hallucinators? A Comprehensive Analysis of LLM-Based Automated Penetration Testing","publication_year":2026,"publication_date":"2026-04-07","ids":{"openalex":"https://openalex.org/W7152007463","doi":"https://doi.org/10.48550/arxiv.2604.05719"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.05719","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05719","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.05719","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111132451","display_name":"Jiaren Peng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Peng, Jiaren","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104330729","display_name":"Zeqin Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zeqin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133157807","display_name":"Chang You","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"You, Chang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103763966","display_name":"Yan Wang","orcid":"https://orcid.org/0009-0002-6149-0149"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026052021","display_name":"Hanlin Sun","orcid":"https://orcid.org/0000-0003-2961-4834"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Hanlin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101429262","display_name":"Xuan Tian","orcid":"https://orcid.org/0000-0003-1966-2445"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Xuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133224920","display_name":"Shuqiao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shuqiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133171456","display_name":"Junyi Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Junyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112391220","display_name":"Jianguo Zhao","orcid":"https://orcid.org/0000-0002-4701-1570"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Jianguo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028872220","display_name":"Renyang Liu","orcid":"https://orcid.org/0000-0002-7121-1257"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Renyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133152211","display_name":"Haoran Ou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ou, Haoran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133196460","display_name":"Yuqiang Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yuqiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133163453","display_name":"Jiancheng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jiancheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012907450","display_name":"Yutong Jiao","orcid":"https://orcid.org/0000-0003-1181-0589"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiao, Yutong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133169909","display_name":"Kunshu Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Kunshu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133203864","display_name":"Chao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Chao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007910454","display_name":"Fan Shi","orcid":"https://orcid.org/0000-0003-2074-0228"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Fan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119853980","display_name":"Hongda Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Hongda","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133172586","display_name":"Rui Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Rui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133220320","display_name":"Cheng Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Cheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":20,"corresponding_author_ids":["https://openalex.org/A5111132451"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.23770000040531158,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.23770000040531158,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12479","display_name":"Web Application Security Vulnerabilities","score":0.1720999926328659,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.15680000185966492,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hacker","display_name":"Hacker","score":0.53329998254776},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.47999998927116394},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.3946000039577484},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.36489999294281006}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6583999991416931},{"id":"https://openalex.org/C86844869","wikidata":"https://www.wikidata.org/wiki/Q2798820","display_name":"Hacker","level":2,"score":0.53329998254776},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.47999998927116394},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.4731999933719635},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.42089998722076416},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.3946000039577484},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.36489999294281006},{"id":"https://openalex.org/C166052673","wikidata":"https://www.wikidata.org/wiki/Q83021","display_name":"Empirical evidence","level":2,"score":0.3506999909877777},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.329800009727478},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2879999876022339},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2567000091075897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.05719","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05719","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.05719","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05719","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.4201023578643799}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,117],"rapid":[1],"advancement":[2],"of":[3,30,55,67,145],"Large":[4],"Language":[5],"Models":[6],"(LLMs)":[7],"has":[8],"created":[9],"new":[10],"opportunities":[11],"for":[12,186],"Automated":[13],"Penetration":[14],"Testing":[15],"(AutoPT),":[16],"spawning":[17],"numerous":[18],"frameworks":[19,108,112,176],"aimed":[20],"at":[21],"achieving":[22],"end-to-end":[23],"autonomous":[24],"attacks.":[25],"However,":[26],"despite":[27],"the":[28,52,60,156],"proliferation":[29],"related":[31],"studies,":[32],"existing":[33,78,173],"research":[34],"generally":[35],"lacks":[36],"systematic":[37],"architectural":[38,61],"analysis":[39],"and":[40,63,94,109,126,137,177],"large-scale":[41,101,179],"empirical":[42,65,97,180],"comparisons":[43],"under":[44],"a":[45,114,143,168,178],"unified":[46,115],"benchmark.":[47,116],"Therefore,":[48],"this":[49,160],"paper":[50],"presents":[51],"first":[53],"Systematization":[54],"Knowledge":[56],"(SoK)":[57],"focusing":[58],"on":[59,103],"design":[62],"comprehensive":[64],"evaluation":[66],"current":[68],"LLM-based":[69,174],"AutoPT":[70,107,175],"frameworks.":[71],"At":[72,96],"systematization":[73],"level,":[74,98],"we":[75,99,164],"comprehensively":[76],"review":[77],"framework":[79],"designs":[80],"across":[81],"six":[82],"dimensions:":[83],"agent":[84,86,88,90],"architecture,":[85],"plan,":[87],"memory,":[89],"execution,":[91],"external":[92],"knowledge,":[93],"benchmarks.":[95],"conduct":[100],"experiments":[102,118],"13":[104],"representative":[105],"open-source":[106],"2":[110],"baseline":[111],"utilizing":[113],"consumed":[119],"over":[120,139],"10":[121],"billion":[122],"tokens":[123],"in":[124,152,159],"total":[125],"generated":[127],"more":[128,146],"than":[129,147],"1,500":[130],"execution":[131],"logs,":[132],"which":[133],"were":[134],"manually":[135],"reviewed":[136],"analyzed":[138],"four":[140],"months":[141],"by":[142],"panel":[144],"15":[148],"researchers":[149,166],"with":[150,167,183],"expertise":[151],"cybersecurity.":[153],"By":[154],"investigating":[155],"latest":[157],"progress":[158],"rapidly":[161],"developing":[162],"field,":[163],"provide":[165],"structured":[169],"taxonomy":[170],"to":[171],"understand":[172],"benchmark,":[181],"along":[182],"promising":[184],"directions":[185],"future":[187],"research.":[188]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-04-09T00:00:00"}
