{"id":"https://openalex.org/W7160928745","doi":"https://doi.org/10.48550/arxiv.2605.09730","title":"RubricRefine: Improving Tool-Use Agent Reliability with Training-Free Pre-Execution Refinement","display_name":"RubricRefine: Improving Tool-Use Agent Reliability with Training-Free Pre-Execution Refinement","publication_year":2026,"publication_date":"2026-05-10","ids":{"openalex":"https://openalex.org/W7160928745","doi":"https://doi.org/10.48550/arxiv.2605.09730"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.09730","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09730","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.09730","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074030713","display_name":"Will LeVine","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"LeVine, Will","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135988021","display_name":"Brendan Evers","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Evers, Brendan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135934700","display_name":"Sam Saltwick","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saltwick, Sam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5018260641","display_name":"Abhay Venkatesh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Venkatesh, Abhay","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.3792000114917755,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.3792000114917755,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.15410000085830688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.049800001084804535,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.6625999808311462},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.47350001335144043},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4699000120162964},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.44519999623298645},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.42820000648498535},{"id":"https://openalex.org/keywords/argument","display_name":"Argument (complex analysis)","score":0.3953999876976013},{"id":"https://openalex.org/keywords/raising","display_name":"Raising (metalworking)","score":0.3619000017642975},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.35120001435279846}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7246999740600586},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.6625999808311462},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.4821999967098236},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.47350001335144043},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4699000120162964},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.44519999623298645},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.42820000648498535},{"id":"https://openalex.org/C98184364","wikidata":"https://www.wikidata.org/wiki/Q1780131","display_name":"Argument (complex analysis)","level":2,"score":0.3953999876976013},{"id":"https://openalex.org/C2780589192","wikidata":"https://www.wikidata.org/wiki/Q7285140","display_name":"Raising (metalworking)","level":2,"score":0.3619000017642975},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.35120001435279846},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.34060001373291016},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.32910001277923584},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3172999918460846},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.30570000410079956},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.29010000824928284},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.26980000734329224},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26429998874664307},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2549999952316284},{"id":"https://openalex.org/C143587482","wikidata":"https://www.wikidata.org/wiki/Q1543216","display_name":"Iterative and incremental development","level":2,"score":0.25099998712539673},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.09730","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09730","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.09730","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09730","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.6613535284996033,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Iterative":[0],"self-refinement":[1],"is":[2],"a":[3,74],"popular":[4],"inference-time":[5,119],"reliability":[6],"technique,":[7],"but":[8],"its":[9],"effectiveness":[10],"in":[11],"code-mode":[12],"tool":[13,55],"use":[14],"depends":[15],"heavily":[16],"on":[17,110,130,140],"the":[18,21,131,137,155],"structure":[19],"of":[20],"feedback":[22,36,69],"signal:":[23],"unstructured":[24],"critique":[25],"helps":[26],"inconsistently":[27],"across":[28,107],"models,":[29,109],"and":[30,85,95,147,153],"even":[31],"revision":[32],"with":[33,112,121,136],"real":[34],"execution":[35,101,114],"improves":[37],"only":[38],"modestly":[39],"($0.75$":[40],"vs.":[41],"$0.65$":[42],"baseline).":[43],"The":[44],"dominant":[45],"failures":[46,98],"are":[47],"inter-tool":[48,141],"contract":[49,80,93,142],"violations":[50],"(wrong":[51],"output":[52],"shape,":[53],"incorrect":[54],"routing,":[56],"broken":[57],"argument":[58],"provenance)":[59],"that":[60,82],"run":[61],"to":[62,123],"completion":[63],"without":[64],"raising":[65],"errors,":[66],"making":[67],"runtime":[68],"insufficient.":[70],"We":[71],"introduce":[72],"RubricRefine,":[73],"training-free":[75],"method":[76,156],"for":[77],"pre-execution":[78],"semantic":[79],"verification":[81],"generates":[83],"task-":[84],"registry-specific":[86],"rubrics,":[87],"scores":[88],"candidate":[89],"code":[90],"against":[91],"explicit":[92],"checks,":[94],"iteratively":[96],"repairs":[97],"before":[99],"any":[100],"occurs.":[102],"RubricRefine":[103],"reaches":[104],"$0.86$,":[105],"averaged":[106],"seven":[108],"M3ToolEval":[111],"zero":[113],"attempts,":[115],"improving":[116],"over":[117],"prior":[118],"baselines":[120],"up":[122],"$2.6\\times$":[124],"lower":[125],"latency.":[126],"Performance":[127],"remains":[128],"flat":[129],"predominantly":[132],"single-step":[133],"API-Bank,":[134],"consistent":[135],"method's":[138],"reliance":[139],"structure.":[143],"A":[144],"rubric-category":[145],"ablation":[146],"calibration":[148],"analysis":[149],"further":[150],"characterize":[151],"when":[152],"why":[154],"works.":[157]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-13T00:00:00"}
