{"id":"https://openalex.org/W7134832180","doi":"https://doi.org/10.48550/arxiv.2603.08117","title":"UIS-Digger: Towards Comprehensive Research Agent Systems for Real-world Unindexed Information Seeking","display_name":"UIS-Digger: Towards Comprehensive Research Agent Systems for Real-world Unindexed Information Seeking","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134832180","doi":"https://doi.org/10.48550/arxiv.2603.08117"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.08117","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08117","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.08117","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128630435","display_name":"Chang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Chang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128657006","display_name":"Chuqiao Kuang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuang, Chuqiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128651399","display_name":"Tianyi Zhuang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhuang, Tianyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103008691","display_name":"Yuxin Cheng","orcid":"https://orcid.org/0000-0002-6494-0101"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Yuxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128636323","display_name":"Huichi Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Huichi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128642381","display_name":"Xiaoguang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xiaoguang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5046228314","display_name":"Lifeng Shang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shang, Lifeng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5128630435"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.7786999940872192,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.7786999940872192,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.06030000001192093,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.04500000178813934,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.477400004863739},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.37540000677108765},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.37450000643730164},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.35429999232292175},{"id":"https://openalex.org/keywords/information-system","display_name":"Information system","score":0.3490999937057495}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7142999768257141},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.477400004863739},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3828999996185303},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.37540000677108765},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.37450000643730164},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.35429999232292175},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.35030001401901245},{"id":"https://openalex.org/C180198813","wikidata":"https://www.wikidata.org/wiki/Q121182","display_name":"Information system","level":2,"score":0.3490999937057495},{"id":"https://openalex.org/C2776636917","wikidata":"https://www.wikidata.org/wiki/Q3772297","display_name":"Information seeking","level":2,"score":0.3330000042915344},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.326200008392334},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.3010999858379364},{"id":"https://openalex.org/C13687954","wikidata":"https://www.wikidata.org/wiki/Q4826847","display_name":"Autonomous agent","level":2,"score":0.30059999227523804}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.08117","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08117","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.08117","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08117","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,189],"LLM-based":[3],"information-seeking":[4,212],"agents":[5,15,86],"have":[6],"achieved":[7],"record-breaking":[8],"performance":[9,90],"on":[10,19,92,97,101,105],"established":[11],"benchmarks.":[12],"However,":[13],"these":[14],"remain":[16],"heavily":[17],"reliant":[18],"search-engine-indexed":[20],"knowledge,":[21],"leaving":[22],"a":[23,88,119,136,151,186,205],"critical":[24],"blind":[25],"spot:":[26],"Unindexed":[27],"Information":[28],"Seeking":[29],"(UIS).":[30],"This":[31,166],"paper":[32],"identifies":[33],"and":[34,55,99,127,132,145,164,178,207],"explores":[35],"the":[36,73,108,111,168,197],"UIS":[37,61,76,202],"problem,":[38],"where":[39],"vital":[40],"information":[41],"is":[42],"not":[43,183],"captured":[44],"by":[45],"search":[46],"engine":[47],"crawlers,":[48],"such":[49,161],"as":[50,162],"overlooked":[51],"content,":[52],"dynamic":[53],"webpages,":[54],"embedded":[56],"files.":[57],"Despite":[58],"its":[59],"significance,":[60],"remains":[62],"an":[63],"underexplored":[64],"challenge.":[65],"To":[66,113],"address":[67],"this":[68],"gap,":[69],"we":[70,116],"introduce":[71],"UIS-QA,":[72],"first":[74,198],"dedicated":[75],"benchmark,":[77],"comprising":[78],"110":[79],"expert-annotated":[80],"QA":[81],"pairs.":[82],"Notably,":[83],"even":[84],"state-of-the-art":[85],"experience":[87],"drastic":[89],"drop":[91],"UIS-QA":[93],"(e.g.,":[94],"from":[95],"70.90":[96],"GAIA":[98],"46.70":[100],"BrowseComp-zh":[102],"to":[103],"24.55":[104],"UIS-QA),":[106],"underscoring":[107],"severity":[109],"of":[110,170],"problem.":[112],"mitigate":[114],"this,":[115],"propose":[117],"UIS-Digger,":[118],"novel":[120],"multi-agent":[121],"framework":[122],"that":[123],"incorporates":[124],"dual-mode":[125],"browsing":[126],"enables":[128],"simultaneous":[129],"webpage":[130],"searching":[131],"file":[133],"parsing.":[134],"With":[135],"relatively":[137],"small":[138],"$\\sim$30B-parameter":[139],"backbone":[140],"LLM":[141],"optimized":[142],"using":[143],"SFT":[144],"RFT":[146],"training":[147],"strategies,":[148],"UIS-Digger":[149],"sets":[150],"strong":[152],"baseline":[153],"at":[154],"27.27\\%,":[155],"outperforming":[156],"systems":[157],"integrating":[158],"sophisticated":[159],"LLMs":[160],"O3":[163],"GPT-4.1.":[165],"demonstrates":[167],"importance":[169],"proactive":[171],"interaction":[172],"with":[173],"unindexed":[174],"sources":[175],"for":[176,200,210],"effective":[177],"comprehensive":[179],"information-seeking.":[180],"Our":[181],"work":[182],"only":[184],"uncovers":[185],"fundamental":[187],"limitation":[188],"current":[190],"agent":[191],"evaluation":[192],"paradigms":[193],"but":[194],"also":[195],"provides":[196],"toolkit":[199],"advancing":[201],"research,":[203],"defining":[204],"new":[206],"promising":[208],"direction":[209],"robust":[211],"systems.":[213],"The":[214],"dataset":[215],"has":[216],"been":[217],"released":[218],"at:":[219],"https://huggingface.co/datasets/UIS-Digger/UIS-QA.":[220]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-11T00:00:00"}
