{"id":"https://openalex.org/W7139036837","doi":"https://doi.org/10.48550/arxiv.2603.16289","title":"VisBrowse-Bench: Benchmarking Visual-Native Search for Multimodal Browsing Agents","display_name":"VisBrowse-Bench: Benchmarking Visual-Native Search for Multimodal Browsing Agents","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7139036837","doi":"https://doi.org/10.48550/arxiv.2603.16289"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.16289","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16289","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.16289","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130188875","display_name":"Zhengbo Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhengbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126945690","display_name":"Jinbo Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Jinbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130184821","display_name":"Zhaowen Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Zhaowen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129828952","display_name":"Changtao Miao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miao, Changtao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072021704","display_name":"Yuhan Hong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, Yuhan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129992594","display_name":"Qimeng Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Qimeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129947781","display_name":"Yumeng Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yumeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129785631","display_name":"Feier Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Feier","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130203626","display_name":"Yihe Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Yihe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129932079","display_name":"Yuhao Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Yuhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130018462","display_name":"Zitong Shan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shan, Zitong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Xia, Wanke","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Wanke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130103262","display_name":"Yi-Fan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yi-Fan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129919460","display_name":"Bo Zhang (6559)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Bo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129765638","display_name":"Zhe Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129766905","display_name":"Shiming Xiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiang, Shiming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129879419","display_name":"Ying Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Ying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":17,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.00279999990016222,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.0005000000237487257,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6312999725341797},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6251000165939331},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5662000179290771},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5593000054359436},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4846000075340271},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.412200003862381}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8314999938011169},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6312999725341797},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6251000165939331},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5662000179290771},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5593000054359436},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4846000075340271},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4803999960422516},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.412200003862381},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38600000739097595},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3684999942779541},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3522999882698059},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3027999997138977},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.29789999127388},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2978000044822693},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.28859999775886536},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.2872999906539917},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.2694999873638153},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.16289","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16289","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.16289","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.16289","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,173],"rapid":[1],"advancement":[2],"of":[3,33,40,44,158,171],"Multimodal":[4],"Large":[5],"Language":[6],"Models":[7],"(MLLMs)":[8],"has":[9],"enabled":[10],"browsing":[11,120],"agents":[12],"to":[13,122],"acquire":[14],"and":[15,37,72,90,104,125,139,175],"reason":[16,126],"over":[17,127],"multimodal":[18,84],"information":[19,43,129],"in":[20,47,142],"the":[21,38,48,74,80,119,131,150,161],"real":[22],"world.":[23],"But":[24],"existing":[25],"benchmarks":[26],"suffer":[27],"from":[28],"two":[29],"limitations:":[30],"insufficient":[31],"evaluation":[32],"visual":[34,42,76,128],"reasoning":[35,49,77],"ability":[36],"neglect":[39],"native":[41],"web":[45],"pages":[46],"chains.":[50],"To":[51],"address":[52],"these":[53],"challenges,":[54],"we":[55],"introduce":[56],"a":[57,101],"new":[58],"benchmark":[59],"for":[60],"visual-native":[61],"search,":[62],"VisBrowse-Bench.":[63],"It":[64],"contains":[65],"169":[66],"VQA":[67],"instances":[68],"covering":[69],"multiple":[70],"domains":[71],"evaluates":[73],"models'":[75],"capabilities":[78],"during":[79,130],"search":[81,132],"process":[82],"through":[83],"evidence":[85],"cross-validation":[86],"via":[87],"text-image":[88],"retrieval":[89],"joint":[91],"reasoning.":[92],"These":[93],"data":[94,176],"were":[95],"constructed":[96],"by":[97],"human":[98],"experts":[99],"using":[100],"multi-stage":[102],"pipeline":[103],"underwent":[105],"rigorous":[106],"manual":[107],"verification.":[108],"We":[109,134],"additionally":[110],"propose":[111],"an":[112,156,169],"agent":[113,121],"workflow":[114],"that":[115,148],"can":[116,177],"effectively":[117],"drive":[118],"actively":[123],"collect":[124],"process.":[133],"comprehensively":[135],"evaluated":[136],"both":[137],"open-source":[138],"closed-source":[140],"models":[141],"this":[143],"workflow.":[144],"Experimental":[145],"results":[146],"show":[147],"even":[149],"best-performing":[151],"model,":[152,165],"Claude-4.6-Opus":[153],"only":[154,167],"achieves":[155,168],"accuracy":[157,170],"47.6%,":[159],"while":[160],"proprietary":[162],"Deep":[163],"Research":[164],"o3-deep-research":[166],"41.1%.":[172],"code":[174],"be":[178],"accessed":[179],"at:":[180],"https://github.com/ZhengboZhang/VisBrowse-Bench":[181]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-20T00:00:00"}
