{"id":"https://openalex.org/W7140327230","doi":"https://doi.org/10.48550/arxiv.2603.22633","title":"Graph-Aware Late Chunking for Retrieval-Augmented Generation in Biomedical Literature","display_name":"Graph-Aware Late Chunking for Retrieval-Augmented Generation in Biomedical Literature","publication_year":2026,"publication_date":"2026-03-23","ids":{"openalex":"https://openalex.org/W7140327230","doi":"https://doi.org/10.48550/arxiv.2603.22633"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.22633","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22633","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.22633","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107631059","display_name":"Pouria Mortezaagha","orcid":"https://orcid.org/0009-0006-4768-6654"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mortezaagha, Pouria","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5006363388","display_name":"Arya Rahgozar","orcid":"https://orcid.org/0000-0002-2127-2449"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rahgozar, Arya","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5107631059"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9205999970436096,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9205999970436096,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.027799999341368675,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.017999999225139618,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.6029000282287598},{"id":"https://openalex.org/keywords/mean-reciprocal-rank","display_name":"Mean reciprocal rank","score":0.5999000072479248},{"id":"https://openalex.org/keywords/chunking","display_name":"Chunking (psychology)","score":0.5059999823570251},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5041000247001648},{"id":"https://openalex.org/keywords/unified-medical-language-system","display_name":"Unified Medical Language System","score":0.40700000524520874},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.3675000071525574},{"id":"https://openalex.org/keywords/learning-to-rank","display_name":"Learning to rank","score":0.3391000032424927},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3357999920845032},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.3352000117301941}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7577000260353088},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6477000117301941},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.6029000282287598},{"id":"https://openalex.org/C44083865","wikidata":"https://www.wikidata.org/wiki/Q3853443","display_name":"Mean reciprocal rank","level":2,"score":0.5999000072479248},{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.5059999823570251},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5041000247001648},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44530001282691956},{"id":"https://openalex.org/C69505689","wikidata":"https://www.wikidata.org/wiki/Q455338","display_name":"Unified Medical Language System","level":2,"score":0.40700000524520874},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.3675000071525574},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34709998965263367},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.3391000032424927},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3357999920845032},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.3352000117301941},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.33329999446868896},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.3174999952316284},{"id":"https://openalex.org/C547195049","wikidata":"https://www.wikidata.org/wiki/Q1725664","display_name":"Terminology","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3034999966621399},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.30000001192092896},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C2777742833","wikidata":"https://www.wikidata.org/wiki/Q1964083","display_name":"Reciprocal","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26739999651908875},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.26429998874664307},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.259799987077713},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.25369998812675476},{"id":"https://openalex.org/C2778775528","wikidata":"https://www.wikidata.org/wiki/Q5135432","display_name":"Closing (real estate)","level":2,"score":0.25099998712539673},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.25},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.25}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.22633","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22633","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.22633","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.22633","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7100555300712585,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Retrieval-Augmented":[0],"Generation":[1,147],"(RAG)":[2],"systems":[3],"for":[4,33,188],"biomedical":[5,189],"literature":[6],"are":[7],"typically":[8],"evaluated":[9],"using":[10,97],"ranking":[11,106],"metrics":[12,107,113,171],"like":[13],"Mean":[14],"Reciprocal":[15],"Rank":[16],"(MRR),":[17],"which":[18],"measure":[19],"how":[20],"well":[21],"the":[22,25,50,126,154,179],"system":[23],"identifies":[24],"single":[26,135],"most":[27],"relevant":[28],"chunk.":[29],"We":[30,61,87],"argue":[31],"that":[32,66,150,169,177],"full-text":[34],"scientific":[35],"documents,":[36],"this":[37],"paradigm":[38],"is":[39,183],"incomplete:":[40],"it":[41],"rewards":[42],"retrieval":[43,47,152,175],"precision":[44],"while":[45,137,161],"ignoring":[46],"breadth":[48],"--":[49],"ability":[51],"to":[52,143,157],"surface":[53],"evidence":[54],"from":[55,133,141],"across":[56],"a":[57,64,120,134,184],"document's":[58],"structural":[59,72,111,174],"sections.":[60,146],"propose":[62],"GraLC-RAG,":[63],"framework":[65],"unifies":[67],"late":[68],"chunking":[69],"with":[70],"graph-aware":[71],"intelligence,":[73],"introducing":[74],"structure-aware":[75,138],"chunk":[76],"boundary":[77],"detection,":[78],"UMLS":[79],"knowledge":[80],"graph":[81],"infusion,":[82],"and":[83,101,110,176],"graph-guided":[84],"hybrid":[85],"retrieval.":[86],"evaluate":[88],"six":[89],"strategies":[90],"on":[91],"2,359":[92],"IMRaD-filtered":[93],"PubMed":[94],"Central":[95],"articles":[96],"2,033":[98],"cross-section":[99],"questions":[100],"two":[102],"metric":[103],"families:":[104],"standard":[105,170],"(MRR,":[108],"Recall@k)":[109],"coverage":[112],"(SecCov@k,":[114],"CS":[115],"Recall).":[116],"Our":[117],"results":[118],"expose":[119],"sharp":[121],"divergence:":[122],"content-similarity":[123],"methods":[124,139],"achieve":[125],"highest":[127],"MRR":[128],"(0.517)":[129],"but":[130],"always":[131],"retrieve":[132,140],"section,":[136],"up":[142],"15.6x":[144],"more":[145],"experiments":[148],"show":[149],"KG-infused":[151],"narrows":[153],"answer-quality":[155],"gap":[156,182],"delta-F1":[158],"=":[159],"0.009":[160],"maintaining":[162],"4.6x":[163],"section":[164],"diversity.":[165],"These":[166],"findings":[167],"demonstrate":[168],"systematically":[172],"undervalue":[173],"closing":[178],"multi-section":[180],"synthesis":[181],"key":[185],"open":[186],"problem":[187],"RAG.":[190]},"counts_by_year":[],"updated_date":"2026-03-26T06:10:45.909354","created_date":"2026-03-26T00:00:00"}
