{"id":"https://openalex.org/W7138230811","doi":"https://doi.org/10.48550/arxiv.2603.15594","title":"OpenSeeker: Democratizing Frontier Search Agents by Fully Open-Sourcing Training Data","display_name":"OpenSeeker: Democratizing Frontier Search Agents by Fully Open-Sourcing Training Data","publication_year":2026,"publication_date":"2026-03-16","ids":{"openalex":"https://openalex.org/W7138230811","doi":"https://doi.org/10.48550/arxiv.2603.15594"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.15594","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.15594","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129667059","display_name":"Yuwen Du","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Du, Yuwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129673428","display_name":"Rui Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Rui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129662494","display_name":"Shuo Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Shuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129662334","display_name":"Xinyu Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Xinyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129717188","display_name":"Yijun Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Yijun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129734986","display_name":"Yuzhu Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Yuzhu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129696092","display_name":"Siheng Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Siheng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129667059"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.27160000801086426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.27160000801086426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.13979999721050262,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.08399999886751175,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/frontier","display_name":"Frontier","score":0.5939000248908997},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.57669997215271},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.5022000074386597},{"id":"https://openalex.org/keywords/scarcity","display_name":"Scarcity","score":0.4530999958515167},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4083000123500824},{"id":"https://openalex.org/keywords/data-driven","display_name":"Data-driven","score":0.365200012922287},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.34290000796318054},{"id":"https://openalex.org/keywords/competitor-analysis","display_name":"Competitor analysis","score":0.33959999680519104},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.3314000070095062}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7060999870300293},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.5939000248908997},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.57669997215271},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.5022000074386597},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49410000443458557},{"id":"https://openalex.org/C109747225","wikidata":"https://www.wikidata.org/wiki/Q815758","display_name":"Scarcity","level":2,"score":0.4530999958515167},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4097000062465668},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4083000123500824},{"id":"https://openalex.org/C2780440489","wikidata":"https://www.wikidata.org/wiki/Q5227278","display_name":"Data-driven","level":2,"score":0.365200012922287},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.34290000796318054},{"id":"https://openalex.org/C127576917","wikidata":"https://www.wikidata.org/wiki/Q624630","display_name":"Competitor analysis","level":2,"score":0.33959999680519104},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.319599986076355},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3122999966144562},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2939999997615814},{"id":"https://openalex.org/C94822996","wikidata":"https://www.wikidata.org/wiki/Q1777902","display_name":"Satisficing","level":2,"score":0.28929999470710754},{"id":"https://openalex.org/C2779231336","wikidata":"https://www.wikidata.org/wiki/Q7534724","display_name":"Sketch","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C166423231","wikidata":"https://www.wikidata.org/wiki/Q1891170","display_name":"Semantic search","level":3,"score":0.2558000087738037},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.15594","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.15594","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15594","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deep":[0],"search":[1,20,68,217],"capabilities":[2],"have":[3],"become":[4],"an":[5],"indispensable":[6],"competency":[7],"for":[8],"frontier":[9,216],"Large":[10],"Language":[11],"Model":[12],"(LLM)":[13],"agents,":[14],"yet":[15],"the":[16,43,46,64,91,123,127,169,206,211],"development":[17],"of":[18,31,45],"high-performance":[19],"agents":[21],"remains":[22],"dominated":[23],"by":[24],"industrial":[25,184],"giants":[26],"due":[27],"to":[28,100,121,130,214],"a":[29,117,222],"lack":[30],"transparent,":[32,224],"high-quality":[33,132],"training":[34,142,208],"data.":[35],"This":[36],"persistent":[37],"data":[38],"scarcity":[39],"has":[40],"fundamentally":[41],"hindered":[42],"progress":[44],"broader":[47],"research":[48,219],"community":[49],"in":[50],"developing":[51],"and":[52,72,97,109,159,181,196,210,220],"innovating":[53],"within":[54],"this":[55,59],"domain.":[56],"To":[57],"bridge":[58],"gap,":[60],"we":[61],"introduce":[62],"OpenSeeker,":[63,138],"first":[65],"fully":[66,171,204],"open-source":[67,172,205],"agent":[69,173,218],"(i.e.,":[70],"model":[71,212],"data)":[73],"that":[74,137],"achieves":[75,149],"frontier-level":[76],"performance":[77,151],"through":[78],"two":[79],"core":[80],"technical":[81],"innovations:":[82],"(1)":[83],"Fact-grounded":[84],"scalable":[85],"controllable":[86,107],"QA":[87],"synthesis,":[88,114],"which":[89,115],"reverse-engineers":[90],"web":[92],"graph":[93],"via":[94,191],"topological":[95],"expansion":[96],"entity":[98],"obfuscation":[99],"generate":[101,131],"complex,":[102],"multi-hop":[103],"reasoning":[104],"tasks":[105],"with":[106,163],"coverage":[108],"complexity.":[110],"(2)":[111],"Denoised":[112],"trajectory":[113],"employs":[116],"retrospective":[118],"summarization":[119],"mechanism":[120],"denoise":[122],"trajectory,":[124],"therefore":[125],"promoting":[126],"teacher":[128],"LLMs":[129],"actions.":[133],"Experimental":[134],"results":[135],"demonstrate":[136],"trained":[139,162],"(a":[140],"single":[141],"run)":[143],"on":[144,179,198],"only":[145],"11.7k":[146],"synthesized":[147],"samples,":[148],"state-of-the-art":[150],"across":[152],"multiple":[153],"benchmarks":[154],"including":[155],"BrowseComp,":[156],"BrowseComp-ZH,":[157],"xbench-DeepSearch,":[158],"WideSearch.":[160],"Notably,":[161],"simple":[164],"SFT,":[165,195],"OpenSeeker":[166],"significantly":[167],"outperforms":[168],"second-best":[170],"DeepDive":[174],"(e.g.,":[175],"29.5%":[176],"v.s.":[177,201],"15.3%":[178],"BrowseComp),":[180],"even":[182],"surpasses":[183],"competitors":[185],"such":[186],"as":[187],"Tongyi":[188],"DeepResearch":[189],"(trained":[190],"extensive":[192],"continual":[193],"pre-training,":[194],"RL)":[197],"BrowseComp-ZH":[199],"(48.4%":[200],"46.7%).":[202],"We":[203],"complete":[207],"dataset":[209],"weights":[213],"democratize":[215],"foster":[221],"more":[223],"collaborative":[225],"ecosystem.":[226]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
