{"id":"https://openalex.org/W7128055812","doi":"https://doi.org/10.48550/arxiv.2602.05014","title":"DeepRead: Document Structure-Aware Reasoning to Enhance Agentic Search","display_name":"DeepRead: Document Structure-Aware Reasoning to Enhance Agentic Search","publication_year":2026,"publication_date":"2026-02-04","ids":{"openalex":"https://openalex.org/W7128055812","doi":"https://doi.org/10.48550/arxiv.2602.05014"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.05014","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.05014","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.05014","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125199771","display_name":"Zhanli Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Zhanli","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125122398","display_name":"Huiwen Tian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian, Huiwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112480770","display_name":"Lvzhou Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Lvzhou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119967794","display_name":"Yixuan Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Yixuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125117188","display_name":"Ping Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Ping","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5125199771"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7681999802589417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7681999802589417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.11840000003576279,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.023099999874830246,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/operationalization","display_name":"Operationalization","score":0.6256999969482422},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.45399999618530273},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4528999924659729},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4212999939918518},{"id":"https://openalex.org/keywords/bridge","display_name":"Bridge (graph theory)","score":0.37619999051094055},{"id":"https://openalex.org/keywords/reading","display_name":"Reading (process)","score":0.328900009393692},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.3240000009536743}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7760000228881836},{"id":"https://openalex.org/C9354725","wikidata":"https://www.wikidata.org/wiki/Q286017","display_name":"Operationalization","level":2,"score":0.6256999969482422},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.45399999618530273},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4528999924659729},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4212999939918518},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40209999680519104},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.37619999051094055},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.328900009393692},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.3240000009536743},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3100999891757965},{"id":"https://openalex.org/C191015642","wikidata":"https://www.wikidata.org/wiki/Q1132459","display_name":"Fragmentation (computing)","level":2,"score":0.3061000108718872},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3012000024318695},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.29840001463890076},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C166088908","wikidata":"https://www.wikidata.org/wiki/Q308495","display_name":"Abductive reasoning","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.25769999623298645}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.05014","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.05014","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.05014","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.05014","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.597186803817749,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,42,76,91,121,164],"rapid":[2],"advancement":[3],"of":[4,38,79,149,167],"tool-use":[5],"capabilities":[6],"in":[7,125,170],"Large":[8],"Language":[9],"Models":[10],"(LLMs),":[11],"Retrieval-Augmented":[12],"Generation":[13],"(RAG)":[14],"is":[15,177],"shifting":[16],"from":[17],"static,":[18],"one-shot":[19],"retrieval":[20,127],"toward":[21],"autonomous,":[22],"multi-turn":[23],"evidence":[24],"acquisition.":[25],"However,":[26],"existing":[27],"agentic":[28,143],"search":[29,144],"frameworks":[30],"typically":[31],"treat":[32],"long":[33],"documents":[34],"as":[35],"flat":[36],"collections":[37],"unstructured":[39],"chunks,":[40],"disregarding":[41],"native":[43],"hierarchical":[44,109],"organization":[45],"and":[46,89,101],"sequential":[47],"logic":[48],"essential":[49],"for":[50,98,103],"human":[51],"comprehension.":[52],"To":[53],"bridge":[54],"this":[55],"gap,":[56],"we":[57],"introduce":[58],"\\textbf{DeepRead},":[59],"a":[60,84,114],"structure-aware":[61],"document":[62,136,173],"reasoning":[63,73,117],"agent":[64],"designed":[65],"to":[66],"operationalize":[67],"document-native":[68],"structural":[69,77,168],"priors":[70],"into":[71],"actionable":[72],"capabilities.":[74],"Leveraging":[75],"fidelity":[78],"modern":[80],"OCR,":[81],"DeepRead":[82,140,157],"constructs":[83],"paragraph-level,":[85],"coordinate-based":[86],"navigation":[87],"system":[88],"equips":[90],"LLM":[92],"with":[93],"two":[94],"synergistic":[95],"tools:":[96],"\\textsf{Retrieve}":[97],"scanning-aware":[99],"localization,":[100],"\\textsf{ReadSection}":[102],"contiguous,":[104],"order-preserving":[105],"reading":[106,161],"within":[107],"specific":[108],"scopes.":[110],"This":[111],"design":[112],"elicits":[113],"human-like":[115],"``locate-then-read''":[116],"paradigm,":[118],"effectively":[119],"mitigating":[120],"context":[122],"fragmentation":[123],"inherent":[124],"traditional":[126],"methods.":[128],"Extensive":[129],"evaluations":[130],"across":[131],"four":[132],"benchmarks":[133],"spanning":[134],"diverse":[135],"types":[137],"demonstrate":[138],"that":[139,156],"outperforms":[141],"Search-o1-style":[142],"baselines":[145],"by":[146],"an":[147],"average":[148],"10.3\\%.":[150],"Fine-grained":[151],"behavioral":[152],"analysis":[153],"further":[154],"confirms":[155],"autonomously":[158],"adopts":[159],"human-aligned":[160],"strategies,":[162],"validating":[163],"critical":[165],"role":[166],"awareness":[169],"achieving":[171],"precise":[172],"reasoning.":[174],"Our":[175],"code":[176],"available":[178],"at":[179],"https://github.com/Zhanli-Li/DeepRead.":[180]},"counts_by_year":[],"updated_date":"2026-02-14T06:23:00.392402","created_date":"2026-02-07T00:00:00"}
