{"id":"https://openalex.org/W7133607175","doi":"https://doi.org/10.48550/arxiv.2603.03538","title":"Online Learnability of Chain-of-Thought Verifiers: Soundness and Completeness Trade-offs","display_name":"Online Learnability of Chain-of-Thought Verifiers: Soundness and Completeness Trade-offs","publication_year":2026,"publication_date":"2026-03-03","ids":{"openalex":"https://openalex.org/W7133607175","doi":"https://doi.org/10.48550/arxiv.2603.03538"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.03538","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128162455","display_name":"Maria-Florina Balcan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Balcan, Maria-Florina","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094348498","display_name":"Avrim Blum","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Blum, Avrim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128209107","display_name":"Kiriaki Fragkia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fragkia, Kiriaki","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128208316","display_name":"Zhiyuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zhiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128184032","display_name":"Dravyansh Sharma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sharma, Dravyansh","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5128162455"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.20100000500679016,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.20100000500679016,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.11729999631643295,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.08659999817609787,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/soundness","display_name":"Soundness","score":0.930400013923645},{"id":"https://openalex.org/keywords/completeness","display_name":"Completeness (order theory)","score":0.7906000018119812},{"id":"https://openalex.org/keywords/gas-meter-prover","display_name":"Gas meter prover","score":0.7354000210762024},{"id":"https://openalex.org/keywords/mathematical-proof","display_name":"Mathematical proof","score":0.7197999954223633},{"id":"https://openalex.org/keywords/learnability","display_name":"Learnability","score":0.6603000164031982},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6406000256538391},{"id":"https://openalex.org/keywords/mistake","display_name":"Mistake","score":0.5088000297546387}],"concepts":[{"id":"https://openalex.org/C39920170","wikidata":"https://www.wikidata.org/wiki/Q693083","display_name":"Soundness","level":2,"score":0.930400013923645},{"id":"https://openalex.org/C17231256","wikidata":"https://www.wikidata.org/wiki/Q5156540","display_name":"Completeness (order theory)","level":2,"score":0.7906000018119812},{"id":"https://openalex.org/C159718280","wikidata":"https://www.wikidata.org/wiki/Q5526353","display_name":"Gas meter prover","level":3,"score":0.7354000210762024},{"id":"https://openalex.org/C108710211","wikidata":"https://www.wikidata.org/wiki/Q11538","display_name":"Mathematical proof","level":2,"score":0.7197999954223633},{"id":"https://openalex.org/C2777723229","wikidata":"https://www.wikidata.org/wiki/Q4367921","display_name":"Learnability","level":2,"score":0.6603000164031982},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6406000256538391},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5958999991416931},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.5098999738693237},{"id":"https://openalex.org/C2777179996","wikidata":"https://www.wikidata.org/wiki/Q911222","display_name":"Mistake","level":2,"score":0.5088000297546387},{"id":"https://openalex.org/C162838799","wikidata":"https://www.wikidata.org/wiki/Q596077","display_name":"Counterexample","level":2,"score":0.43779999017715454},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41850000619888306},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.35929998755455017},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.34049999713897705},{"id":"https://openalex.org/C119322782","wikidata":"https://www.wikidata.org/wiki/Q2662236","display_name":"VC dimension","level":2,"score":0.3310999870300293},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31360000371932983},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3061000108718872},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C176248197","wikidata":"https://www.wikidata.org/wiki/Q458526","display_name":"Probably approximately correct learning","level":4,"score":0.2685999870300293}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.03538","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.03538","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.03538","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.03538","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"with":[4,52,245,257],"chain-of-thought":[5,102],"generation":[6,219],"have":[7],"demonstrated":[8],"great":[9],"potential":[10],"for":[11,100,158,170,189],"solving":[12],"complex":[13],"reasoning":[14,112,133,140,242],"and":[15,28,49,83,108,135,217,260],"planning":[16],"tasks.":[17],"However,":[18],"the":[19,70,78,115,118,121,149,155,163,172,209,230,236,240],"output":[20,65],"of":[21,111,117,124,148,178,183,194,211,214,220,235],"current":[22],"LLMs":[23,34],"is":[24,76],"not":[25],"fully":[26],"reliable":[27],"needs":[29],"careful":[30],"verification.":[31],"Even":[32],"if":[33],"get":[35],"more":[36],"accurate":[37],"over":[38],"time,":[39],"learned":[40,202],"verifiers":[41,103,203],"can":[42,204,238],"help":[43],"increase":[44],"trust,":[45],"enforce":[46],"safety":[47],"constraints,":[48],"ensure":[50],"alignment":[51],"personal":[53],"preferences.":[54],"A":[55],"major":[56],"challenge":[57],"in":[58,128,131,162],"learning":[59,98,101,159],"verifiers,":[60],"however,":[61],"especially":[62],"when":[63],"their":[64],"will":[66],"be":[67,205],"used":[68,206],"by":[69,91],"generator":[71,82,256],"to":[72,207,252],"improve":[73],"its":[74],"reasoning,":[75],"that":[77,233],"feedback":[79],"loop":[80],"between":[81],"verifier":[84,161],"may":[85],"produce":[86],"substantial":[87],"distribution":[88],"shift.":[89],"Motivated":[90],"this":[92],"challenge,":[93],"we":[94,144,249],"propose":[95],"an":[96],"online":[97],"framework":[99],"that,":[104],"given":[105,180],"a":[106,109,132,160,181,191,212,254],"problem":[107],"sequence":[110],"steps,":[113],"check":[114],"correctness":[116],"solution.":[119],"Highlighting":[120],"asymmetric":[122,195],"role":[123],"soundness":[125,184],"errors":[126,130,137],"(failure":[127],"catching":[129],"trace)":[134],"completeness":[136],"(flagging":[138],"correct":[139],"steps":[141],"as":[142,186,188],"wrong),":[143],"introduce":[145],"novel":[146],"extensions":[147],"Littlestone":[150],"dimension":[151],"which":[152],"tightly":[153],"characterize":[154],"mistake":[156],"bounds":[157],"realizable":[164],"setting.":[165],"We":[166,197],"provide":[167],"optimal":[168],"algorithms":[169],"finding":[171],"Pareto-frontier":[173],"(the":[174],"smallest":[175],"total":[176],"number":[177],"mistakes":[179],"budget":[182],"mistakes)":[185],"well":[187],"minimizing":[190],"linear":[192],"combination":[193],"costs.":[196],"further":[198],"show":[199,250],"how":[200,251],"our":[201],"boost":[208],"accuracy":[210],"collection":[213],"weak":[215],"generators,":[216],"enable":[218],"proofs":[221],"beyond":[222],"what":[223],"they":[224],"were":[225],"initially":[226],"trained":[227],"on.":[228],"With":[229],"mild":[231],"assumption":[232],"one":[234],"generators":[237],"generate":[239],"next":[241],"step":[243],"correctly":[244],"some":[246],"minimal":[247],"probability,":[248],"learn":[253],"strong":[255],"small":[258],"error":[259],"abstention":[261],"rates.":[262]},"counts_by_year":[],"updated_date":"2026-04-09T06:08:40.794217","created_date":"2026-03-06T00:00:00"}
