{"id":"https://openalex.org/W7148290952","doi":"https://doi.org/10.48550/arxiv.2604.01221","title":"HippoCamp: Benchmarking Contextual Agents on Personal Computers","display_name":"HippoCamp: Benchmarking Contextual Agents on Personal Computers","publication_year":2026,"publication_date":"2026-04-01","ids":{"openalex":"https://openalex.org/W7148290952","doi":"https://doi.org/10.48550/arxiv.2604.01221"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.01221","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01221","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.01221","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132812164","display_name":"Zhe Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yang, Zhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132733462","display_name":"Shulin Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Shulin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109778666","display_name":"Kairui Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Kairui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132829971","display_name":"Shuai Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Shuai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126920862","display_name":"Hoang-Nhat Nguyen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nguyen, Hoang-Nhat","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132810434","display_name":"Yichi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yichi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072395419","display_name":"Zujin Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Zujin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132789643","display_name":"Mengying Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Mengying","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132786757","display_name":"Zinan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zinan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132822884","display_name":"Jingkang Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jingkang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132792922","display_name":"Chen Change Loy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Loy, Chen Change","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132808679","display_name":"Ziwei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ziwei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5132812164"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12607","display_name":"Personal Information Management and User Behavior","score":0.6991999745368958,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12607","display_name":"Personal Information Management and User Behavior","score":0.6991999745368958,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.04690000042319298,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.020600000396370888,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.8019999861717224},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6535999774932861},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5569000244140625},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5286999940872192},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4553999900817871},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.3846000134944916},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.37229999899864197},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.3425000011920929}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.8019999861717224},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8019000291824341},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6535999774932861},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5569000244140625},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5286999940872192},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4553999900817871},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39100000262260437},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.3846000134944916},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.37229999899864197},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3425000011920929},{"id":"https://openalex.org/C132964779","wikidata":"https://www.wikidata.org/wiki/Q2110223","display_name":"Raw data","level":2,"score":0.3400000035762787},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.33340001106262207},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31369999051094055},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.31150001287460327},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.29670000076293945},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.29420000314712524},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.28679999709129333},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27900001406669617},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.27230000495910645},{"id":"https://openalex.org/C67712803","wikidata":"https://www.wikidata.org/wiki/Q7901853","display_name":"User modeling","level":3,"score":0.26080000400543213},{"id":"https://openalex.org/C89505385","wikidata":"https://www.wikidata.org/wiki/Q47146","display_name":"User interface","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2558000087738037},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.2556999921798706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.01221","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01221","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.01221","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01221","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,111],"present":[1],"HippoCamp,":[2],"a":[3,113,132,192],"new":[4],"benchmark":[5,54],"designed":[6],"to":[7,40,85],"evaluate":[8,112],"agents'":[9,87],"capabilities":[10,88],"on":[11,21,126],"multimodal":[12,118,168],"file":[13,57,160],"management.":[14],"Unlike":[15],"existing":[16],"agent":[17],"benchmarks":[18],"that":[19],"focus":[20],"tasks":[22],"like":[23],"web":[24],"interaction,":[25],"tool":[26],"use,":[27],"or":[28],"software":[29],"automation":[30],"in":[31,37,89,146,186],"generic":[32],"settings,":[33],"HippoCamp":[34,178],"evaluates":[35],"agents":[36,185],"user-centric":[38,188],"environments":[39,189],"model":[41],"individual":[42],"user":[43,147],"profiles":[44,61],"and":[45,93,123,154,170,190],"search":[46],"massive":[47],"personal":[48,159,198],"files":[49],"for":[50,107,195],"context-aware":[51],"reasoning.":[52,95],"Our":[53,128],"instantiates":[55],"device-scale":[56],"systems":[58],"over":[59,71],"real-world":[60,73],"spanning":[62],"diverse":[63],"modalities,":[64],"comprising":[65],"42.4":[66],"GB":[67],"of":[68,116,183],"data":[69],"across":[70],"2K":[72],"files.":[74],"Building":[75],"upon":[76],"the":[77,137,174,180],"raw":[78],"files,":[79],"we":[80,100],"construct":[81],"581":[82],"QA":[83],"pairs":[84],"assess":[86],"search,":[90],"evidence":[91,171],"perception,":[92],"multi-step":[94],"To":[96],"facilitate":[97],"fine-grained":[98],"analysis,":[99],"provide":[101],"46.1K":[102],"densely":[103],"annotated":[104],"structured":[105],"trajectories":[106],"step-wise":[108,164],"failure":[109,165],"diagnosis.":[110],"wide":[114],"range":[115],"state-of-the-art":[117],"large":[119],"language":[120],"models":[121,141],"(MLLMs)":[122],"agentic":[124],"methods":[125],"HippoCamp.":[127],"comprehensive":[129],"experiments":[130],"reveal":[131],"significant":[133],"performance":[134],"gap:":[135],"even":[136],"most":[138],"advanced":[139],"commercial":[140],"achieve":[142],"only":[143],"48.3%":[144],"accuracy":[145],"profiling,":[148],"struggling":[149],"particularly":[150],"with":[151],"long-horizon":[152],"retrieval":[153],"cross-modal":[155],"reasoning":[156],"within":[157],"dense":[158],"systems.":[161],"Furthermore,":[162],"our":[163],"diagnosis":[166],"identifies":[167],"perception":[169],"grounding":[172],"as":[173],"primary":[175],"bottlenecks.":[176],"Ultimately,":[177],"exposes":[179],"critical":[181],"limitations":[182],"current":[184],"realistic,":[187],"provides":[191],"robust":[193],"foundation":[194],"developing":[196],"next-generation":[197],"AI":[199],"assistants.":[200]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
