{"id":"https://openalex.org/W7154393045","doi":"https://doi.org/10.48550/arxiv.2604.11107","title":"AnomalyGen: Enhancing Log-Based Anomaly Detection with Code-Guided Data Augmentation","display_name":"AnomalyGen: Enhancing Log-Based Anomaly Detection with Code-Guided Data Augmentation","publication_year":2026,"publication_date":"2026-04-13","ids":{"openalex":"https://openalex.org/W7154393045","doi":"https://doi.org/10.48550/arxiv.2604.11107"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.11107","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11107","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.11107","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133610927","display_name":"Xinyu Li","orcid":"https://orcid.org/0009-0009-1645-5580"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Xinyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080873193","display_name":"Yintong Huo","orcid":"https://orcid.org/0009-0006-8798-5667"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huo, Yintong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133603495","display_name":"Chenxi Mao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mao, Chenxi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133590472","display_name":"Shiwen Shan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shan, Shiwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133609711","display_name":"Yuxin Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Yuxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133556928","display_name":"Yanlin Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yanlin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133563013","display_name":"Zibin Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Zibin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5133610927"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.970300018787384,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.970300018787384,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.0071000000461936,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.006599999964237213,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/anomaly-detection","display_name":"Anomaly detection","score":0.6273000240325928},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6101999878883362},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6013000011444092},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.48820000886917114},{"id":"https://openalex.org/keywords/data-consistency","display_name":"Data consistency","score":0.4848000109195709},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4458000063896179},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4447000026702881},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.3903999924659729},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.35199999809265137}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7189000248908997},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.6273000240325928},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6101999878883362},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6013000011444092},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5260000228881836},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.48820000886917114},{"id":"https://openalex.org/C93361087","wikidata":"https://www.wikidata.org/wiki/Q4426698","display_name":"Data consistency","level":2,"score":0.4848000109195709},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4458000063896179},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4447000026702881},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4440999925136566},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.3903999924659729},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38690000772476196},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.35199999809265137},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.34130001068115234},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.3377000093460083},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.32440000772476196},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.31209999322891235},{"id":"https://openalex.org/C33762810","wikidata":"https://www.wikidata.org/wiki/Q461671","display_name":"Data integrity","level":2,"score":0.29420000314712524},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.28859999775886536},{"id":"https://openalex.org/C92446256","wikidata":"https://www.wikidata.org/wiki/Q3306762","display_name":"Data validation","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C12997251","wikidata":"https://www.wikidata.org/wiki/Q567560","display_name":"Anomaly (physics)","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.26669999957084656},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.26010000705718994},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25209999084472656},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.11107","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11107","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.11107","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11107","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Log-based":[0],"anomaly":[1,126],"detection":[2,127],"is":[3],"fundamentally":[4],"constrained":[5],"by":[6,55,174],"training":[7,53],"data":[8,54],"sparsity.":[9],"Our":[10,183],"empirical":[11],"study":[12],"reveals":[13],"that":[14,51,161],"public":[15],"benchmark":[16],"datasets":[17,186],"cover":[18],"less":[19],"than":[20],"10%":[21],"of":[22,141],"source":[23,61],"code":[24],"log":[25,58],"templates.":[26],"Consequently,":[27],"models":[28,128,136],"frequently":[29],"misclassify":[30],"unseen":[31],"but":[32],"valid":[33,87],"execution":[34,88],"paths":[35],"as":[36],"anomalies,":[37],"leading":[38],"to":[39,84,96,156,176,190],"false":[40],"alarms.":[41],"To":[42],"address":[43],"this,":[44],"we":[45],"propose":[46],"AnomalyGen,":[47],"a":[48],"novel":[49],"framework":[50,184],"augments":[52],"synthesizing":[56],"labeled":[57],"sequences":[59,114],"from":[60,154],"code.":[62],"AnomalyGen":[63,130],"combines":[64],"log-oriented":[65],"static":[66,163],"analysis":[67,164],"with":[68,115,147],"Large":[69],"Language":[70],"Model":[71],"(LLM)":[72],"reasoning":[73,95],"in":[74],"three":[75],"stages:":[76],"(1)":[77],"building":[78],"Log-Oriented":[79],"Control":[80],"Flow":[81],"Graphs":[82],"(LCFGs)":[83],"enumerate":[85],"structurally":[86],"paths;":[89],"(2)":[90],"applying":[91],"LLM":[92],"Chain-of-Thought":[93],"(CoT)":[94],"verify":[97],"logical":[98],"consistency":[99],"and":[100,110,121,144,165,178,185],"generate":[101],"realistic":[102],"runtime":[103],"parameters":[104],"(e.g.,":[105],"block":[106],"IDs,":[107],"IP":[108],"addresses);":[109],"(3)":[111],"labeling":[112],"generated":[113],"domain":[116],"heuristics.":[117],"Evaluations":[118],"on":[119,151],"HDFS":[120,152],"Zookeeper":[122],"across":[123],"12":[124],"diverse":[125],"show":[129,160],"consistently":[131],"improves":[132],"performance.":[133],"Deep":[134],"learning":[135],"achieved":[137],"average":[138],"F1-score":[139],"gains":[140],"2.18%":[142],"(HDFS)":[143],"1.69%":[145],"(Zookeeper),":[146],"an":[148],"unsupervised":[149],"Transformer":[150],"jumping":[153],"0.818":[155],"0.970.":[157],"Ablation":[158],"results":[159],"both":[162],"LLM-based":[166],"verification":[167],"are":[168,187],"necessary:":[169],"removing":[170],"them":[171],"reduces":[172],"F1":[173],"up":[175],"8.7":[177],"10.7":[179],"percentage":[180],"points,":[181],"respectively.":[182],"publicly":[188],"available":[189],"facilitate":[191],"future":[192],"research.":[193]},"counts_by_year":[],"updated_date":"2026-04-15T06:04:33.058270","created_date":"2026-04-15T00:00:00"}
