{"id":"https://openalex.org/W7162133976","doi":"https://doi.org/10.48550/arxiv.2605.22219","title":"SGR-Bench: Benchmarking Search Agents on State-Gated Retrieval","display_name":"SGR-Bench: Benchmarking Search Agents on State-Gated Retrieval","publication_year":2026,"publication_date":"2026-05-21","ids":{"openalex":"https://openalex.org/W7162133976","doi":"https://doi.org/10.48550/arxiv.2605.22219"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.22219","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22219","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.22219","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019916870","display_name":"Ningyuan Li","orcid":"https://orcid.org/0000-0002-1837-8840"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ningyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136809689","display_name":"Haiyang Shen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shen, Haiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136744561","display_name":"Mugeng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Mugeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136795148","display_name":"Yudong Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Yudong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136804354","display_name":"Zhuofan Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Zhuofan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079578592","display_name":"Sixiong Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Sixiong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136729528","display_name":"Yun Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Yun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.560699999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.560699999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.13179999589920044,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.06159999966621399,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6640999913215637},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.621399998664856},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5921000242233276},{"id":"https://openalex.org/keywords/data-retrieval","display_name":"Data retrieval","score":0.47749999165534973},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.424699991941452},{"id":"https://openalex.org/keywords/audit","display_name":"Audit","score":0.3752000033855438},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.36910000443458557},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.36880001425743103}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8126000165939331},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6938999891281128},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6640999913215637},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.621399998664856},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5921000242233276},{"id":"https://openalex.org/C551230270","wikidata":"https://www.wikidata.org/wiki/Q4368942","display_name":"Data retrieval","level":2,"score":0.47749999165534973},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.424699991941452},{"id":"https://openalex.org/C199521495","wikidata":"https://www.wikidata.org/wiki/Q181487","display_name":"Audit","level":2,"score":0.3752000033855438},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.36910000443458557},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.36880001425743103},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.3497999906539917},{"id":"https://openalex.org/C90288658","wikidata":"https://www.wikidata.org/wiki/Q3318149","display_name":"Human\u2013computer information retrieval","level":3,"score":0.34369999170303345},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.33559998869895935},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3172999918460846},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.31439998745918274},{"id":"https://openalex.org/C116425068","wikidata":"https://www.wikidata.org/wiki/Q4686695","display_name":"Adversarial information retrieval","level":5,"score":0.3086000084877014},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30809998512268066},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.30720001459121704},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.302700012922287},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C21025794","wikidata":"https://www.wikidata.org/wiki/Q5141219","display_name":"Cognitive models of information retrieval","level":4,"score":0.2743000090122223},{"id":"https://openalex.org/C2983174267","wikidata":"https://www.wikidata.org/wiki/Q3775098","display_name":"Video retrieval","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C44083865","wikidata":"https://www.wikidata.org/wiki/Q3853443","display_name":"Mean reciprocal rank","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.22219","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22219","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.22219","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.22219","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/15","display_name":"Life in Land","score":0.5564202070236206}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2],"large":[3],"language":[4],"models":[5],"and":[6,73,85,99,112,125,174,189],"tool-using":[7],"agents":[8,157],"have":[9],"expanded":[10],"the":[11,39,82,103,132,166],"range":[12],"of":[13,21,102,149],"benchmarked":[14],"web":[15,162],"tasks.":[16],"Yet":[17],"an":[18],"important":[19],"class":[20],"specialized":[22,29],"retrieval":[23,42,55,89,169],"tasks":[24,68],"remains":[25,143],"undercharacterized.":[26],"On":[27,130],"many":[28],"data-retrieval":[30],"websites,":[31],"answer-bearing":[32],"evidence":[33],"becomes":[34],"accessible":[35],"only":[36,136,185],"after":[37],"establishing":[38],"correct":[40],"site-specific":[41,88,168],"state":[43,90],"through":[44],"filters,":[45],"views,":[46],"hierarchies,":[47],"or":[48],"scopes.":[49],"We":[50,57,118],"term":[51],"this":[52,63],"capability":[53],"state-gated":[54,116],"(SGR).":[56],"introduce":[58],"SGR-Bench,":[59,131],"a":[60,93,160],"benchmark":[61],"for":[62,115,184],"setting":[64],"containing":[65],"100":[66],"expert-curated":[67],"spanning":[69],"six":[70],"source":[71],"families":[72],"12":[74],"public":[75],"data":[76],"ecosystems.":[77],"Each":[78],"task":[79],"requires":[80],"discovering":[81],"appropriate":[83],"website":[84],"configuring":[86],"its":[87],"to":[91],"produce":[92],"structured":[94],"answer.":[95],"SGR-Bench":[96],"pairs":[97],"constraint-guided":[98],"goal-oriented":[100],"formulations":[101],"same":[104],"underlying":[105],"problems,":[106],"enabling":[107],"controlled":[108],"comparisons":[109],"between":[110],"explicit":[111],"implicit":[113],"guidance":[114],"retrieval.":[117],"evaluate":[119],"eight":[120],"CLI-based":[121],"agentic":[122],"LLM":[123],"systems":[124],"three":[126],"commercial":[127],"search-agent":[128],"products.":[129],"strongest":[133],"system":[134],"reaches":[135],"66.18%":[137],"item-level":[138],"F1,":[139],"while":[140],"row-level":[141],"F1":[142],"much":[144],"lower.":[145],"A":[146],"manual":[147],"audit":[148],"156":[150],"analyzable":[151],"failed":[152],"CLI":[153],"trajectories":[154],"shows":[155],"why:":[156],"often":[158],"reach":[159],"relevant":[161],"source,":[163],"but":[164],"establish":[165],"wrong":[167],"state.":[170],"Retrieval-scope":[171],"drift":[172],"(37.2%)":[173],"criterion":[175],"mismatch":[176],"(27.6%)":[177],"dominate,":[178],"whereas":[179],"final":[180],"answer":[181],"composition":[182],"accounts":[183],"10.3%.":[186],"The":[187],"dataset":[188],"single-case":[190],"evaluation":[191],"instructions":[192],"are":[193],"available":[194],"at":[195],"https://huggingface.co/datasets/PKUAIWeb/SGR-BENCH.":[196]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-23T00:00:00"}
