{"id":"https://openalex.org/W7148819277","doi":"https://doi.org/10.48550/arxiv.2604.01977","title":"RuleForge: Automated Generation and Validation for Web Vulnerability Detection at Scale","display_name":"RuleForge: Automated Generation and Validation for Web Vulnerability Detection at Scale","publication_year":2026,"publication_date":"2026-04-02","ids":{"openalex":"https://openalex.org/W7148819277","doi":"https://doi.org/10.48550/arxiv.2604.01977"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.01977","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01977","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.01977","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101435114","display_name":"Ayush Garg","orcid":"https://orcid.org/0009-0006-8347-8465"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Garg, Ayush","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132838433","display_name":"Sophia Hager","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hager, Sophia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042593186","display_name":"Jacob Montiel","orcid":"https://orcid.org/0000-0003-2245-0718"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Montiel, Jacob","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132833435","display_name":"Aditya Tiwari","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tiwari, Aditya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052865677","display_name":"Michael A. Gentile","orcid":"https://orcid.org/0000-0003-4542-0845"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gentile, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132885623","display_name":"Zach Reavis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Reavis, Zach","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132839490","display_name":"David Magnotti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Magnotti, David","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132900670","display_name":"Wayne Fullen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fullen, Wayne","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12479","display_name":"Web Application Security Vulnerabilities","score":0.9287999868392944,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12479","display_name":"Web Application Security Vulnerabilities","score":0.9287999868392944,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10734","display_name":"Information and Cyber Security","score":0.02410000003874302,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.014700000174343586,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6850000023841858},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.592199981212616},{"id":"https://openalex.org/keywords/vulnerability","display_name":"Vulnerability (computing)","score":0.5703999996185303},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.47290000319480896},{"id":"https://openalex.org/keywords/data-validation","display_name":"Data validation","score":0.46459999680519104},{"id":"https://openalex.org/keywords/flagging","display_name":"Flagging","score":0.4645000100135803},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.46070000529289246},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.42559999227523804},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4146000146865845},{"id":"https://openalex.org/keywords/web-application","display_name":"Web application","score":0.3644999861717224}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7868000268936157},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6850000023841858},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.592199981212616},{"id":"https://openalex.org/C95713431","wikidata":"https://www.wikidata.org/wiki/Q631425","display_name":"Vulnerability (computing)","level":2,"score":0.5703999996185303},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.53329998254776},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.47290000319480896},{"id":"https://openalex.org/C92446256","wikidata":"https://www.wikidata.org/wiki/Q3306762","display_name":"Data validation","level":2,"score":0.46459999680519104},{"id":"https://openalex.org/C2777548347","wikidata":"https://www.wikidata.org/wiki/Q5456937","display_name":"Flagging","level":2,"score":0.4645000100135803},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.46070000529289246},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.42559999227523804},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4146000146865845},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.3644999861717224},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.3458999991416931},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.34360000491142273},{"id":"https://openalex.org/C112789634","wikidata":"https://www.wikidata.org/wiki/Q18207010","display_name":"False positives and false negatives","level":3,"score":0.33739998936653137},{"id":"https://openalex.org/C39920170","wikidata":"https://www.wikidata.org/wiki/Q693083","display_name":"Soundness","level":2,"score":0.33629998564720154},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.320499986410141},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3158000111579895},{"id":"https://openalex.org/C35525427","wikidata":"https://www.wikidata.org/wiki/Q745881","display_name":"Intrusion detection system","level":2,"score":0.310699999332428},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31060001254081726},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31029999256134033},{"id":"https://openalex.org/C51110983","wikidata":"https://www.wikidata.org/wiki/Q16503490","display_name":"Overconfidence effect","level":2,"score":0.3084000051021576},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.2969000041484833},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2912999987602234},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C111065885","wikidata":"https://www.wikidata.org/wiki/Q1189053","display_name":"Fuzz testing","level":3,"score":0.275299996137619},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2696000039577484},{"id":"https://openalex.org/C106436119","wikidata":"https://www.wikidata.org/wiki/Q836575","display_name":"Quality assurance","level":3,"score":0.26339998841285706},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C80958533","wikidata":"https://www.wikidata.org/wiki/Q1047174","display_name":"Audit trail","level":3,"score":0.25600001215934753},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.25459998846054077},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.25429999828338623},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.2542000114917755}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.01977","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01977","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.01977","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01977","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.5376861095428467,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Security":[0],"teams":[1],"face":[2],"a":[3,186],"challenge:":[4],"the":[5,17,26,36,77,209],"volume":[6],"of":[7,136,211,221],"newly":[8],"disclosed":[9],"Common":[10],"Vulnerabilities":[11],"and":[12,91,113,130,138,184,208,218],"Exposures":[13],"(CVEs)":[14],"far":[15],"exceeds":[16],"capacity":[18],"to":[19,145,159,202],"manually":[20],"develop":[21],"detection":[22,50],"mechanisms.":[23],"In":[24],"2025,":[25],"National":[27],"Vulnerability":[28],"Database":[29],"published":[30],"over":[31],"48,000":[32],"new":[33],"vulnerabilities,":[34],"motivating":[35],"need":[37],"for":[38,80,94,190,199],"automation.":[39],"We":[40,173],"present":[41,175],"RuleForge,":[42],"an":[43],"AWS":[44],"internal":[45],"system":[46,112],"that":[47,53,74],"automatically":[48],"generates":[49],"rules--JSON-based":[51],"patterns":[52],"identify":[54],"malicious":[55],"HTTP":[56],"requests":[57],"exploiting":[58],"specific":[59],"vulnerabilities--from":[60],"structured":[61,78],"Nuclei":[62,67],"templates":[63,68],"describing":[64],"CVE":[65],"details.":[66],"provide":[69],"standardized,":[70],"YAML-based":[71],"vulnerability":[72],"descriptions":[73],"serve":[75],"as":[76,108],"input":[79],"our":[81,102],"rule":[82,178],"generation":[83,152,179],"process.":[84],"This":[85,118],"paper":[86],"focuses":[87],"on":[88,101],"RuleForge's":[89],"architecture":[90],"operational":[92],"deployment":[93],"CVE-related":[95],"threat":[96],"detection,":[97],"with":[98,157,165],"particular":[99],"emphasis":[100],"novel":[103],"LLM-as-a-judge":[104],"(Large":[105],"Language":[106],"Model":[107],"judge)":[109],"confidence":[110],"validation":[111,119,147],"systematic":[114,170],"feedback":[115,167],"integration":[116],"mechanism.":[117],"approach":[120],"evaluates":[121],"candidate":[122],"rules":[123,223],"across":[124],"two":[125],"dimensions--sensitivity":[126],"(avoiding":[127,132],"false":[128,133,140],"negatives)":[129],"specificity":[131],"positives)--achieving":[134],"AUROC":[135],"0.75":[137],"reducing":[139],"positives":[141],"by":[142],"67%":[143],"compared":[144],"synthetic-test-only":[146],"in":[148,214],"production.":[149],"Our":[150,193],"5x5":[151],"strategy":[153],"(five":[154],"parallel":[155],"candidates":[156],"up":[158],"five":[160],"refinement":[161],"attempts":[162],"each)":[163],"combined":[164],"continuous":[166],"loops":[168],"enables":[169],"quality":[171,219],"improvement.":[172],"also":[174],"extensions":[176],"enabling":[177],"from":[180],"unstructured":[181],"data":[182],"sources":[183],"demonstrate":[185],"proof-of-concept":[187],"agentic":[188],"workflow":[189],"multi-event-type":[191],"detection.":[192],"lessons":[194],"learned":[195],"highlight":[196],"critical":[197],"considerations":[198],"applying":[200],"LLMs":[201],"cybersecurity":[203],"tasks,":[204],"including":[205],"overconfidence":[206],"mitigation":[207],"importance":[210],"domain":[212],"expertise":[213],"both":[215],"prompt":[216],"design":[217],"review":[220],"generated":[222],"through":[224],"human-in-the-loop":[225],"validation.":[226]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-04T00:00:00"}
