{"id":"https://openalex.org/W7162296299","doi":"https://doi.org/10.48550/arxiv.2605.23590","title":"Co-ReAct: Rubrics as Step-Level Collaborators for ReAct Agents","display_name":"Co-ReAct: Rubrics as Step-Level Collaborators for ReAct Agents","publication_year":2026,"publication_date":"2026-05-22","ids":{"openalex":"https://openalex.org/W7162296299","doi":"https://doi.org/10.48550/arxiv.2605.23590"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.23590","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.23590","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.23590","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074053099","display_name":"Jiazheng Kang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kang, Jiazheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136902987","display_name":"Bowen Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Bowen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103857768","display_name":"Zixin Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Zixin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136908125","display_name":"Jiangwang Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jiangwang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136937631","display_name":"Xiao Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136947942","display_name":"Da Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Da","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136972120","display_name":"Guanjun Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Guanjun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.8134999871253967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.8134999871253967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.050700001418590546,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.04619999974966049,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/rubric","display_name":"Rubric","score":0.975600004196167},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5702999830245972},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.476500004529953},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.4763999879360199},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.45159998536109924},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.40149998664855957},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.3939000070095062}],"concepts":[{"id":"https://openalex.org/C111640148","wikidata":"https://www.wikidata.org/wiki/Q847349","display_name":"Rubric","level":2,"score":0.975600004196167},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6474000215530396},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5702999830245972},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.476500004529953},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.4763999879360199},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.45159998536109924},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.40149998664855957},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3939000070095062},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.3937999904155731},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.3718999922275543},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35199999809265137},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.30730000138282776},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.30329999327659607},{"id":"https://openalex.org/C204434341","wikidata":"https://www.wikidata.org/wiki/Q357789","display_name":"Adjudication","level":2,"score":0.30300000309944153},{"id":"https://openalex.org/C3018989441","wikidata":"https://www.wikidata.org/wiki/Q691640","display_name":"Quality of evidence","level":3,"score":0.2976999878883362},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2842999994754791},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.25609999895095825}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.23590","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.23590","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.23590","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.23590","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6251219511032104,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"ReAct-style":[0],"agents":[1,187],"for":[2],"search-intensive,":[3],"multi-step":[4],"reasoning":[5,21],"tasks":[6],"rely":[7],"largely":[8],"on":[9,189],"their":[10,215],"own":[11],"internal":[12],"judgment":[13],"to":[14,18,25,30,110],"decide":[15],"what":[16,117],"evidence":[17,123],"seek,":[19],"which":[20],"or":[22,36,64,127,145],"action":[23],"step":[24],"take":[26],"next,":[27],"and":[28,70,78,173,180,193],"when":[29],"stop,":[31],"often":[32,76],"producing":[33],"shallow,":[34],"redundant,":[35],"poorly":[37],"targeted":[38],"trajectories.":[39],"Prior":[40],"work":[41],"has":[42],"explored":[43],"rubrics":[44,58,92,163],"as":[45,61,93,205],"external":[46],"quality":[47],"signals,":[48],"but":[49],"existing":[50],"uses":[51,91],"are":[52,75,165],"mostly":[53],"evaluative":[54],"rather":[55,80,167],"than":[56,81,168],"action-guiding:":[57],"typically":[59],"serve":[60,204],"training-time":[62],"rewards":[63],"post-hoc":[65],"evaluators":[66],"of":[67],"completed":[68],"outputs,":[69],"in":[71,122],"deep-research":[72],"settings":[73],"they":[74],"coarse-grained":[77],"report-level":[79],"step-level.":[82],"We":[83],"introduce":[84],"Co-ReAct,":[85],"a":[86,104,136,152,206],"rubric-guided":[87],"action-selection":[88],"framework":[89],"that":[90,164,209],"step-level":[94],"guidance":[95,132],"during":[96],"inference.":[97],"At":[98],"each":[99],"decision":[100,217],"step,":[101],"Co-ReAct":[102,175],"injects":[103],"rubric":[105,138,200],"into":[106],"the":[107,112,118],"agent's":[108],"context":[109],"guide":[111],"next":[113],"Reason-or-Act":[114],"decision,":[115],"specifying":[116],"agent":[119],"should":[120],"target":[121],"seeking,":[124],"search,":[125],"reasoning,":[126],"self-evaluation.":[128],"To":[129],"make":[130],"this":[131],"reliable,":[133],"we":[134],"train":[135],"dedicated":[137],"generator":[139,201],"with":[140],"GRPO.":[141],"Unlike":[142],"prior":[143],"pairwise":[144],"binary":[146],"preference":[147],"formulations,":[148],"our":[149],"objective":[150],"optimizes":[151],"list-wise":[153],"Spearman":[154],"rank-correlation":[155],"reward":[156],"against":[157],"multi-judge":[158],"expert":[159],"consensus":[160],"rankings,":[161],"encouraging":[162],"discriminative":[166],"merely":[169],"plausible.":[170],"On":[171],"DeepResearchBench":[172],"SQA-CS-V2,":[174],"consistently":[176],"improves":[177,210],"over":[178],"ReAct":[179],"representative":[181],"test-time":[182],"compute":[183],"baselines":[184,212],"across":[185],"search":[186],"built":[188],"both":[190],"8B/14B":[191],"open-source":[192],"frontier":[194],"closed-source":[195],"base":[196],"models.":[197],"The":[198],"trained":[199],"can":[202],"also":[203],"drop-in":[207],"component":[208],"these":[211],"without":[213],"changing":[214],"underlying":[216],"mechanisms.":[218],"Our":[219],"code":[220],"is":[221],"publicly":[222],"available":[223],"at":[224],"https://github.com/ZBWpro/Co-ReAct.":[225]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-26T00:00:00"}
