{"id":"https://openalex.org/W7128479308","doi":"https://doi.org/10.48550/arxiv.2602.07773","title":"SRR-Judge: Step-Level Rating and Refinement for Enhancing Search-Integrated Reasoning in Search Agents","display_name":"SRR-Judge: Step-Level Rating and Refinement for Enhancing Search-Integrated Reasoning in Search Agents","publication_year":2026,"publication_date":"2026-02-08","ids":{"openalex":"https://openalex.org/W7128479308","doi":"https://doi.org/10.48550/arxiv.2602.07773"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.07773","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125489859","display_name":"chen zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125589779","display_name":"Kuicai Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Kuicai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125549232","display_name":"Dexun Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Dexun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125520901","display_name":"Wenjun Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wenjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125588471","display_name":"Qu Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Qu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125592512","display_name":"Wei Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125506150","display_name":"Yong Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5125489859"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.525600016117096,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.525600016117096,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2409999966621399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.09130000323057175,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5112000107765198},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.3917999863624573},{"id":"https://openalex.org/keywords/iterative-refinement","display_name":"Iterative refinement","score":0.38659998774528503},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.3571000099182129},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.3546999990940094},{"id":"https://openalex.org/keywords/base","display_name":"Base (topology)","score":0.3411000072956085},{"id":"https://openalex.org/keywords/case-based-reasoning","display_name":"Case-based reasoning","score":0.32339999079704285}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7203999757766724},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6459000110626221},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5378999710083008},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5112000107765198},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3917999863624573},{"id":"https://openalex.org/C2779982483","wikidata":"https://www.wikidata.org/wiki/Q6094420","display_name":"Iterative refinement","level":2,"score":0.38659998774528503},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.3571000099182129},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.3546999990940094},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3499000072479248},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.3411000072956085},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.32339999079704285},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.2863999903202057},{"id":"https://openalex.org/C83725634","wikidata":"https://www.wikidata.org/wiki/Q7268699","display_name":"Qualitative reasoning","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C125583679","wikidata":"https://www.wikidata.org/wiki/Q755673","display_name":"Search algorithm","level":2,"score":0.28130000829696655},{"id":"https://openalex.org/C159032336","wikidata":"https://www.wikidata.org/wiki/Q2488768","display_name":"Non-monotonic logic","level":2,"score":0.2808000147342682},{"id":"https://openalex.org/C2984391234","wikidata":"https://www.wikidata.org/wiki/Q195771","display_name":"Sequential sampling","level":3,"score":0.266400009393692}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.07773","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.07773","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.07773","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.07773","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"deep":[1,94,149],"search":[2,59,95,150],"agents":[3],"built":[4],"on":[5],"large":[6],"reasoning":[7,57,74],"models":[8,111],"(LRMs)":[9],"excel":[10],"at":[11],"complex":[12],"question":[13],"answering":[14],"by":[15],"iteratively":[16],"planning,":[17],"acting,":[18],"and":[19,45,58,75],"gathering":[20],"evidence,":[21],"a":[22,50,63,140],"capability":[23,96],"known":[24],"as":[25,113],"search-integrated":[26,73],"reasoning.":[27],"However,":[28],"mainstream":[29],"approaches":[30],"often":[31],"train":[32],"this":[33],"ability":[34],"using":[35],"only":[36],"outcome-based":[37],"supervision,":[38],"neglecting":[39],"the":[40,93,98,127],"quality":[41],"of":[42,56,97],"intermediate":[43],"thoughts":[44],"actions.":[46,60],"We":[47],"introduce":[48],"SRR-Judge,":[49],"framework":[51],"for":[52,72],"reliable":[53,105],"step-level":[54,106],"assessment":[55],"Integrated":[61],"into":[62],"modified":[64],"ReAct-style":[65],"rate-and-refine":[66],"workflow,":[67],"SRR-Judge":[68,102,130],"provides":[69],"fine-grained":[70],"guidance":[71],"enables":[76],"efficient":[77],"post-training":[78],"annotation.":[79],"Using":[80],"SRR-annotated":[81],"data,":[82],"we":[83],"apply":[84],"an":[85],"iterative":[86],"rejection":[87],"sampling":[88],"fine-tuning":[89],"procedure":[90],"to":[91,134],"enhance":[92],"base":[99],"agent.":[100],"Empirically,":[101],"delivers":[103],"more":[104],"evaluations":[107],"than":[108],"much":[109],"larger":[110],"such":[112],"DeepSeek-V3.1,":[114],"with":[115,121,129],"its":[116],"ratings":[117],"showing":[118],"strong":[119],"correlation":[120],"final":[122],"answer":[123],"correctness.":[124],"Moreover,":[125],"aligning":[126],"policy":[128],"annotated":[131],"trajectories":[132],"leads":[133],"substantial":[135],"performance":[136],"gains,":[137],"yielding":[138],"over":[139],"10":[141],"percent":[142],"average":[143],"absolute":[144],"pass@1":[145],"improvement":[146],"across":[147],"challenging":[148],"benchmarks.":[151]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-11T00:00:00"}
