{"id":"https://openalex.org/W7152053000","doi":"https://doi.org/10.48550/arxiv.2604.05820","title":"Reinforcement Learning with Negative Tests as Completeness Signal for Formal Specification Synthesis","display_name":"Reinforcement Learning with Negative Tests as Completeness Signal for Formal Specification Synthesis","publication_year":2026,"publication_date":"2026-04-07","ids":{"openalex":"https://openalex.org/W7152053000","doi":"https://doi.org/10.48550/arxiv.2604.05820"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.05820","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05820","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.05820","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103223639","display_name":"Zhechong Huang","orcid":"https://orcid.org/0009-0001-7894-2220"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Huang, Zhechong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089224167","display_name":"Zhao Zhang","orcid":"https://orcid.org/0000-0003-3397-3549"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133185223","display_name":"Zeyu Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Zeyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133206190","display_name":"Huifeng Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Huifeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133206646","display_name":"Yingfei Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiong, Yingfei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5103223639"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.4530999958515167,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10142","display_name":"Formal Methods in Verification","score":0.4530999958515167,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.09920000284910202,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.07129999995231628,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/formal-specification","display_name":"Formal specification","score":0.6254000067710876},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6205000281333923},{"id":"https://openalex.org/keywords/completeness","display_name":"Completeness (order theory)","score":0.5985999703407288},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5952000021934509},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5631999969482422},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5189999938011169},{"id":"https://openalex.org/keywords/formal-verification","display_name":"Formal verification","score":0.5138999819755554},{"id":"https://openalex.org/keywords/specification-language","display_name":"Specification language","score":0.4885999858379364},{"id":"https://openalex.org/keywords/formal-methods","display_name":"Formal methods","score":0.4584999978542328}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7552000284194946},{"id":"https://openalex.org/C116253237","wikidata":"https://www.wikidata.org/wiki/Q1437424","display_name":"Formal specification","level":2,"score":0.6254000067710876},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6205000281333923},{"id":"https://openalex.org/C17231256","wikidata":"https://www.wikidata.org/wiki/Q5156540","display_name":"Completeness (order theory)","level":2,"score":0.5985999703407288},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5952000021934509},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5631999969482422},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5189999938011169},{"id":"https://openalex.org/C111498074","wikidata":"https://www.wikidata.org/wiki/Q173326","display_name":"Formal verification","level":2,"score":0.5138999819755554},{"id":"https://openalex.org/C201677973","wikidata":"https://www.wikidata.org/wiki/Q1209840","display_name":"Specification language","level":2,"score":0.4885999858379364},{"id":"https://openalex.org/C75606506","wikidata":"https://www.wikidata.org/wiki/Q1049183","display_name":"Formal methods","level":2,"score":0.4584999978542328},{"id":"https://openalex.org/C84651959","wikidata":"https://www.wikidata.org/wiki/Q17052506","display_name":"System requirements specification","level":2,"score":0.4514000117778778},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.44200000166893005},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.43479999899864197},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.42579999566078186},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4058000147342682},{"id":"https://openalex.org/C62460635","wikidata":"https://www.wikidata.org/wiki/Q5508853","display_name":"Functional verification","level":3,"score":0.4016000032424927},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.3968000113964081},{"id":"https://openalex.org/C102780508","wikidata":"https://www.wikidata.org/wiki/Q1761598","display_name":"Software requirements specification","level":5,"score":0.3479999899864197},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33570000529289246},{"id":"https://openalex.org/C117222624","wikidata":"https://www.wikidata.org/wiki/Q7575010","display_name":"Specification","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.2962000072002411},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C90069079","wikidata":"https://www.wikidata.org/wiki/Q1805432","display_name":"Language Of Temporal Ordering Specification","level":3,"score":0.26809999346733093},{"id":"https://openalex.org/C3406870","wikidata":"https://www.wikidata.org/wiki/Q6044160","display_name":"Intelligent verification","level":5,"score":0.2646999955177307},{"id":"https://openalex.org/C3309909","wikidata":"https://www.wikidata.org/wiki/Q864155","display_name":"Binary decision diagram","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.2596000134944916},{"id":"https://openalex.org/C2777691520","wikidata":"https://www.wikidata.org/wiki/Q4724019","display_name":"Algebraic specification","level":3,"score":0.25949999690055847},{"id":"https://openalex.org/C146072743","wikidata":"https://www.wikidata.org/wiki/Q192161","display_name":"Formal language","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C202973057","wikidata":"https://www.wikidata.org/wiki/Q7380130","display_name":"Runtime verification","level":3,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.05820","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05820","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.05820","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.05820","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"specification":[1,73,83,119,124,145],"synthesis":[2,84],"task":[3,20],"aims":[4],"to":[5,160,172],"automatically":[6],"generate":[7],"specifications,":[8],"together":[9],"with":[10,154],"any":[11],"necessary":[12],"auxiliary":[13],"verification":[14,36,54,148],"annotations,":[15],"for":[16,72,82,132],"existing":[17],"programs.":[18],"This":[19],"is":[21,50,66,68,127],"important":[22],"because":[23,47],"such":[24],"specifications":[25,61],"serve":[26],"as":[27,120],"behavioral":[28],"contracts":[29],"that":[30,92,99,141,168],"support":[31],"modular":[32],"reasoning":[33],"and":[34,147,152,164],"reusable":[35],"across":[37,136],"a":[38,69,78,89,117,121,155],"codebase.":[39],"At":[40],"the":[41,105,109,130],"same":[42],"time,":[43],"it":[44],"remains":[45,165],"challenging":[46],"verifier-only":[48],"feedback":[49],"fundamentally":[51],"incomplete:":[52],"passing":[53],"establishes":[55],"soundness,":[56],"but":[57],"cannot":[58],"distinguish":[59],"weak":[60],"from":[62],"strong":[63],"ones.":[64],"What":[65],"missing":[67],"fine-grained":[70],"signal":[71,122],"completeness.":[74],"We":[75,107],"present":[76],"SpecRL,":[77],"reinforcement":[79],"learning":[80],"framework":[81],"in":[85],"Dafny.":[86],"SpecRL":[87,142],"introduces":[88],"self-contained":[90],"pipeline":[91],"generates":[93],"negative":[94,113],"tests,":[95],"i.e.,":[96],"input-output":[97],"pairs":[98],"can":[100],"never":[101],"be":[102],"produced":[103],"by":[104,116],"program.":[106],"use":[108],"fraction":[110],"of":[111,123],"these":[112],"tests":[114],"rejected":[115],"candidate":[118],"completeness,":[125],"which":[126],"integrated":[128],"into":[129],"reward":[131],"RL":[133,153],"training.":[134],"Experiments":[135],"four":[137],"model":[138],"sizes":[139],"show":[140],"improves":[143],"both":[144],"strength":[146],"success":[149],"over":[150],"SFT":[151],"binary":[156],"specification-strength":[157],"reward,":[158],"generalizes":[159],"an":[161],"out-of-distribution":[162],"benchmark,":[163],"competitive":[166],"on":[167],"unseen":[169],"benchmark":[170],"compared":[171],"much":[173],"larger":[174],"general-purpose":[175],"LLMs.":[176]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-04-09T00:00:00"}
