{"id":"https://openalex.org/W7115577572","doi":"https://doi.org/10.48550/arxiv.2512.10996","title":"MedBioRAG: Semantic Search and Retrieval-Augmented Generation with Large Language Models for Medical and Biological QA","display_name":"MedBioRAG: Semantic Search and Retrieval-Augmented Generation with Large Language Models for Medical and Biological QA","publication_year":2025,"publication_date":"2025-12-10","ids":{"openalex":"https://openalex.org/W7115577572","doi":"https://doi.org/10.48550/arxiv.2512.10996"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2512.10996","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.10996","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2512.10996","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Kim, Seonok","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kim, Seonok","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.45159998536109924,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.45159998536109924,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3522999882698059,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.03669999912381172,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6295999884605408},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6194000244140625},{"id":"https://openalex.org/keywords/learning-to-rank","display_name":"Learning to rank","score":0.4823000133037567},{"id":"https://openalex.org/keywords/unified-medical-language-system","display_name":"Unified Medical Language System","score":0.4717999994754791},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.3874000012874603},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.37229999899864197},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.33660000562667847},{"id":"https://openalex.org/keywords/document-retrieval","display_name":"Document retrieval","score":0.3215000033378601}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8325999975204468},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6603000164031982},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6295999884605408},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6212000250816345},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6194000244140625},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.4823000133037567},{"id":"https://openalex.org/C69505689","wikidata":"https://www.wikidata.org/wiki/Q455338","display_name":"Unified Medical Language System","level":2,"score":0.4717999994754791},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.47029998898506165},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.3874000012874603},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3749000132083893},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.37229999899864197},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.33660000562667847},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.3215000033378601},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3190999925136566},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.3018999993801117},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.2816999852657318},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.27239999175071716},{"id":"https://openalex.org/C166423231","wikidata":"https://www.wikidata.org/wiki/Q1891170","display_name":"Semantic search","level":3,"score":0.2687999904155731},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.2639000117778778},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.26159998774528503},{"id":"https://openalex.org/C2780479914","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Document classification","level":2,"score":0.25189998745918274}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2512.10996","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.10996","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2512.10996","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.10996","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8002620935440063,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,102,121,127,142],"retrieval-augmented":[3,29],"generation":[4],"(RAG)":[5],"have":[6],"significantly":[7],"enhanced":[8],"the":[9,98,133],"ability":[10],"of":[11,40,135],"large":[12],"language":[13],"models":[14,96],"(LLMs)":[15],"to":[16,32],"perform":[17],"complex":[18],"question-answering":[19],"(QA)":[20],"tasks.":[21,105],"In":[22],"this":[23],"paper,":[24],"we":[25],"introduce":[26],"MedBioRAG,":[27],"a":[28,38],"model":[30,101],"designed":[31],"improve":[33],"biomedical":[34,56,143],"QA":[35,74,123],"performance":[36],"through":[37],"combination":[39],"semantic":[41,136],"and":[42,47,53,60,72,85,97,111,124,139],"lexical":[43],"search,":[44],"document":[45,115],"retrieval,":[46,69,116],"supervised":[48],"fine-tuning.":[49],"MedBioRAG":[50,66,91],"efficiently":[51],"retrieves":[52],"ranks":[54],"relevant":[55],"documents,":[57],"enabling":[58],"precise":[59],"context-aware":[61],"response":[62],"generation.":[63],"We":[64],"evaluate":[65],"across":[67],"text":[68],"close-ended":[70,122],"QA,":[71],"long-form":[73,128],"tasks":[75],"using":[76],"benchmark":[77],"datasets":[78],"such":[79],"as":[80],"NFCorpus,":[81],"TREC-COVID,":[82],"MedQA,":[83],"PubMedQA,":[84],"BioASQ.":[86],"Experimental":[87],"results":[88],"demonstrate":[89],"that":[90],"outperforms":[92],"previous":[93],"state-of-the-art":[94],"(SoTA)":[95],"GPT-4o":[99],"base":[100],"all":[103],"evaluated":[104],"Notably,":[106],"our":[107],"approach":[108],"improves":[109],"NDCG":[110],"MRR":[112],"scores":[113,126],"for":[114],"while":[117],"achieving":[118],"higher":[119],"accuracy":[120],"ROUGE":[125],"QA.":[129],"Our":[130],"findings":[131],"highlight":[132],"effectiveness":[134],"search-based":[137],"retrieval":[138],"LLM":[140],"fine-tuning":[141],"applications.":[144]},"counts_by_year":[],"updated_date":"2025-12-16T23:48:00.217561","created_date":"2025-12-16T00:00:00"}
