{"id":"https://openalex.org/W7156088772","doi":"https://doi.org/10.48550/arxiv.2604.22294","title":"Contexts are Never Long Enough: Structured Reasoning for Scalable Question Answering over Long Document Sets","display_name":"Contexts are Never Long Enough: Structured Reasoning for Scalable Question Answering over Long Document Sets","publication_year":2026,"publication_date":"2026-04-24","ids":{"openalex":"https://openalex.org/W7156088772","doi":"https://doi.org/10.48550/arxiv.2604.22294"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.22294","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22294","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.22294","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134691815","display_name":"Harshit Joshi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Joshi, Harshit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134721110","display_name":"Priyank Shethia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shethia, Priyank","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114988566","display_name":"Jadelynn Dao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dao, Jadelynn","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134704348","display_name":"Monica S. Lam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lam, Monica S.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5134691815"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.2736000120639801,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.2736000120639801,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.20669999718666077,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.13729999959468842,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.6931999921798706},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6395000219345093},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.631600022315979},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.6136000156402588},{"id":"https://openalex.org/keywords/workaround","display_name":"Workaround","score":0.46050000190734863},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.44200000166893005},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4404999911785126},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.43639999628067017},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4036000072956085},{"id":"https://openalex.org/keywords/window","display_name":"Window (computing)","score":0.40070000290870667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8461999893188477},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.6931999921798706},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6459000110626221},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6395000219345093},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.631600022315979},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.6136000156402588},{"id":"https://openalex.org/C194541083","wikidata":"https://www.wikidata.org/wiki/Q457174","display_name":"Workaround","level":2,"score":0.46050000190734863},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.44200000166893005},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4404999911785126},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.43639999628067017},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4036000072956085},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.40070000290870667},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.39340001344680786},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3776000142097473},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.37439998984336853},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.3409999907016754},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33820000290870667},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.3375000059604645},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.3312999904155731},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32510000467300415},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.3230000138282776},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.31529998779296875},{"id":"https://openalex.org/C154690210","wikidata":"https://www.wikidata.org/wiki/Q1668499","display_name":"Rewriting","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.29339998960494995},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.28769999742507935},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2822999954223633},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2818000018596649},{"id":"https://openalex.org/C110326360","wikidata":"https://www.wikidata.org/wiki/Q17149476","display_name":"Metadata modeling","level":4,"score":0.2815999984741211},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C105888452","wikidata":"https://www.wikidata.org/wiki/Q7565148","display_name":"Source document","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2605000138282776},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.25189998745918274},{"id":"https://openalex.org/C2777826928","wikidata":"https://www.wikidata.org/wiki/Q3745713","display_name":"Fingerprint (computing)","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.22294","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22294","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.22294","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.22294","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Real-world":[0],"document":[1,29,83],"question":[2,79],"answering":[3,80],"is":[4,35],"challenging.":[5],"Analysts":[6],"must":[7,60],"synthesize":[8],"evidence":[9],"across":[10],"multiple":[11],"documents":[12,38],"and":[13,41,63,128,132,136,178,187],"different":[14],"parts":[15],"of":[16,56,70,150,157],"each":[17],"document.":[18],"However,":[19],"any":[20],"fixed":[21],"LLM":[22],"context":[23,155],"window":[24,156],"can":[25],"be":[26],"exceeded":[27],"as":[28,53],"collections":[30,84],"grow.":[31],"A":[32],"common":[33],"workaround":[34],"to":[36,130],"decompose":[37],"into":[39,92],"chunks":[40,57],"assemble":[42],"answers":[43],"from":[44],"chunk-level":[45],"outputs,":[46],"but":[47],"this":[48,111],"introduces":[49,118],"an":[50,66],"aggregation":[51],"bottleneck:":[52],"the":[54,154,172],"number":[55],"grows,":[58],"systems":[59],"still":[61],"combine":[62],"reason":[64],"over":[65,81,99,171],"increasingly":[67],"large":[68],"body":[69],"extracted":[71,113],"evidence.":[72],"We":[73],"present":[74],"SLIDERS,":[75],"a":[76,93,119],"framework":[77],"for":[78],"long":[82],"through":[85],"structured":[86,101],"reasoning.":[87],"SLIDERS":[88,117,139],"extracts":[89],"salient":[90],"information":[91],"relational":[94],"database,":[95],"enabling":[96],"scalable":[97],"reasoning":[98],"persistent":[100],"state":[102],"via":[103],"SQL":[104],"rather":[105],"than":[106],"concatenated":[107],"text.":[108],"To":[109],"make":[110],"locally":[112],"representation":[114],"globally":[115],"coherent,":[116],"data":[120],"reconciliation":[121],"stage":[122],"that":[123],"leverages":[124],"provenance,":[125],"extraction":[126],"rationales,":[127],"metadata":[129],"detect":[131],"repair":[133],"duplicated,":[134],"inconsistent,":[135],"incomplete":[137],"records.":[138],"outperforms":[140],"all":[141,149],"baselines":[142],"on":[143,166,181],"three":[144],"existing":[145],"long-context":[146],"benchmarks,":[147],"despite":[148],"them":[151],"fitting":[152],"within":[153],"strong":[158],"base":[159],"LLMs,":[160],"exceeding":[161],"GPT-4.1":[162],"by":[163,176],"6.6":[164],"points":[165,180],"average.":[167],"It":[168],"also":[169],"improves":[170],"next":[173],"best":[174],"baseline":[175],"~19":[177],"~32":[179],"two":[182],"new":[183],"benchmarks":[184],"at":[185],"3.9M":[186],"36M":[188],"tokens,":[189],"respectively.":[190]},"counts_by_year":[],"updated_date":"2026-04-28T06:12:00.211691","created_date":"2026-04-28T00:00:00"}
