{"id":"https://openalex.org/W7134847331","doi":"https://doi.org/10.48550/arxiv.2603.08655","title":"OfficeQA Pro: An Enterprise Benchmark for End-to-End Grounded Reasoning","display_name":"OfficeQA Pro: An Enterprise Benchmark for End-to-End Grounded Reasoning","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134847331","doi":"https://doi.org/10.48550/arxiv.2603.08655"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.08655","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5099230650","display_name":"Krista Opsahl-Ong","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Opsahl-Ong, Krista","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104180027","display_name":"Arnav Singhvi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Singhvi, Arnav","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128684825","display_name":"Jasmine Collins","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Collins, Jasmine","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128276813","display_name":"Ivan Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Ivan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128667003","display_name":"Cindy Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Cindy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083242933","display_name":"Ashutosh Baheti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Baheti, Ashutosh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5095045747","display_name":"Owen Oertell","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oertell, Owen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051310825","display_name":"Jacob Portes","orcid":"https://orcid.org/0000-0003-3102-012X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Portes, Jacob","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109658192","display_name":"Sam Havens","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Havens, Sam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009345506","display_name":"Erich Elsen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Elsen, Erich","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032248436","display_name":"Michael Bendersky","orcid":"https://orcid.org/0000-0002-2941-6240"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bendersky, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128674194","display_name":"Matei Zaharia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zaharia, Matei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128659581","display_name":"Xing Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Xing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5099230650"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.784500002861023,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.784500002861023,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.03280000016093254,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.024399999529123306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5914000272750854},{"id":"https://openalex.org/keywords/treasury","display_name":"Treasury","score":0.5116999745368958},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4593000113964081},{"id":"https://openalex.org/keywords/parametric-statistics","display_name":"Parametric statistics","score":0.4440999925136566},{"id":"https://openalex.org/keywords/frontier","display_name":"Frontier","score":0.427700012922287},{"id":"https://openalex.org/keywords/case-based-reasoning","display_name":"Case-based reasoning","score":0.4007999897003174}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6640999913215637},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5914000272750854},{"id":"https://openalex.org/C2780889827","wikidata":"https://www.wikidata.org/wiki/Q10756188","display_name":"Treasury","level":2,"score":0.5116999745368958},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4593000113964081},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.453900009393692},{"id":"https://openalex.org/C117251300","wikidata":"https://www.wikidata.org/wiki/Q1849855","display_name":"Parametric statistics","level":2,"score":0.4440999925136566},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.427700012922287},{"id":"https://openalex.org/C20162079","wikidata":"https://www.wikidata.org/wiki/Q1151406","display_name":"Case-based reasoning","level":2,"score":0.4007999897003174},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39629998803138733},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.37709999084472656},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.37070000171661377},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32580000162124634},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.32109999656677246},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3156999945640564},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2678000032901764},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.25529998540878296}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.08655","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.08655","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08655","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.08655","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,118,141],"introduce":[1],"OfficeQA":[2,41,81],"Pro,":[3],"a":[4,15,124,133],"benchmark":[5],"for":[6],"evaluating":[7],"AI":[8],"agents":[9,106,122,168],"on":[10,80,85,109,116,159],"grounded,":[11],"multi-document":[12],"reasoning":[13,55],"over":[14,36,110],"large":[16],"and":[17,35,53,60,70,88,156],"heterogeneous":[18],"document":[19,50,103,126],"corpus.":[20],"The":[21],"corpus":[22],"consists":[23,43],"of":[24,44,112,149],"U.S.":[25],"Treasury":[26],"Bulletins":[27],"spanning":[28],"nearly":[29],"100":[30],"years,":[31],"comprising":[32],"89,000":[33],"pages":[34],"26":[37],"million":[38],"numerical":[39],"values.":[40],"Pro":[42,73,82],"133":[45],"questions":[46],"that":[47,120],"require":[48],"precise":[49],"parsing,":[51],"retrieval,":[52],"analytical":[54],"across":[56,139],"both":[57],"unstructured":[58],"text":[59],"tabular":[61],"data.":[62],"Frontier":[63],"LLMs":[64],"including":[65],"Claude":[66],"Opus":[67],"4.6,":[68],"GPT-5.4,":[69],"Gemini":[71],"3.1":[72],"Preview":[74],"achieve":[75],"less":[76,89],"than":[77,90],"5%":[78],"accuracy":[79],"when":[83],"relying":[84],"parametric":[86],"knowledge,":[87],"12%":[91],"with":[92,101,123],"additional":[93,143],"access":[94],"to":[95,145],"the":[96,102,147],"web.":[97],"When":[98],"provided":[99],"directly":[100],"corpus,":[104],"frontier":[105],"still":[107],"struggle":[108],"half":[111],"questions,":[113],"scoring":[114],"34.1%":[115],"average.":[117],"find":[119],"providing":[121],"structured":[125],"representation":[127],"produced":[128],"by":[129],"Databricks'":[130],"ai_parse_document":[131],"yields":[132],"16.1%":[134],"average":[135],"relative":[136],"performance":[137],"gain":[138],"agents.":[140],"conduct":[142],"ablations":[144],"study":[146],"effects":[148],"model":[150],"selection,":[151],"table":[152],"representation,":[153],"retrieval":[154],"strategy,":[155],"test-time":[157],"scaling":[158],"performance.":[160],"Despite":[161],"these":[162],"improvements,":[163],"significant":[164],"headroom":[165],"remains":[166],"before":[167],"can":[169],"be":[170],"considered":[171],"reliable":[172],"at":[173],"enterprise-grade":[174],"grounded":[175],"reasoning.":[176]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-11T00:00:00"}
