{"id":"https://openalex.org/W7155235666","doi":"https://doi.org/10.48550/arxiv.2604.19281","title":"Beyond Semantic Similarity: A Component-Wise Evaluation Framework for Medical Question Answering Systems with Health Equity Implications","display_name":"Beyond Semantic Similarity: A Component-Wise Evaluation Framework for Medical Question Answering Systems with Health Equity Implications","publication_year":2026,"publication_date":"2026-04-21","ids":{"openalex":"https://openalex.org/W7155235666","doi":"https://doi.org/10.48550/arxiv.2604.19281"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.19281","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19281","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.19281","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076388891","display_name":"Abu Noman Sakib","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sakib, Abu Noman Md","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016042501","display_name":"Md. Main Oddin Chisty","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chisty, Md. Main Oddin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100673890","display_name":"Zijie Zhang","orcid":"https://orcid.org/0000-0003-1254-098X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zijie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5076388891"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.39660000801086426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.39660000801086426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.24950000643730164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.22750000655651093,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/equity","display_name":"Equity (law)","score":0.5138000249862671},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5001999735832214},{"id":"https://openalex.org/keywords/completeness","display_name":"Completeness (order theory)","score":0.47429999709129333},{"id":"https://openalex.org/keywords/public-health","display_name":"Public health","score":0.4507000148296356},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3605000078678131},{"id":"https://openalex.org/keywords/health-equity","display_name":"Health equity","score":0.3490999937057495}],"concepts":[{"id":"https://openalex.org/C199728807","wikidata":"https://www.wikidata.org/wiki/Q2578557","display_name":"Equity (law)","level":2,"score":0.5138000249862671},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5087000131607056},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5001999735832214},{"id":"https://openalex.org/C17231256","wikidata":"https://www.wikidata.org/wiki/Q5156540","display_name":"Completeness (order theory)","level":2,"score":0.47429999709129333},{"id":"https://openalex.org/C138816342","wikidata":"https://www.wikidata.org/wiki/Q189603","display_name":"Public health","level":2,"score":0.4507000148296356},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.4106000065803528},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4011000096797943},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3605000078678131},{"id":"https://openalex.org/C2250968","wikidata":"https://www.wikidata.org/wiki/Q1512929","display_name":"Health equity","level":3,"score":0.3490999937057495},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.3305000066757202},{"id":"https://openalex.org/C3019150057","wikidata":"https://www.wikidata.org/wiki/Q92779279","display_name":"Medical information","level":2,"score":0.32199999690055847},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3111000061035156},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2964000105857849},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C162118730","wikidata":"https://www.wikidata.org/wiki/Q1128453","display_name":"Actuarial science","level":1,"score":0.28949999809265137},{"id":"https://openalex.org/C145642194","wikidata":"https://www.wikidata.org/wiki/Q870895","display_name":"Health informatics","level":3,"score":0.28630000352859497},{"id":"https://openalex.org/C69505689","wikidata":"https://www.wikidata.org/wiki/Q455338","display_name":"Unified Medical Language System","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C180198813","wikidata":"https://www.wikidata.org/wiki/Q121182","display_name":"Information system","level":2,"score":0.25690001249313354}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.19281","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19281","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.19281","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19281","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.5594663619995117,"id":"https://metadata.un.org/sdg/10"},{"display_name":"Peace, Justice and strong institutions","score":0.42170271277427673,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"use":[1],"of":[2,20,29,52,58,89,93,112,115,151,154,161,187,221,256,265],"Large":[3],"Language":[4],"Models":[5],"(LLMs)":[6],"to":[7,25,195,207],"support":[8],"patients":[9],"in":[10,32,212,248],"addressing":[11],"medical":[12,55,77,105,250,266],"questions":[13],"is":[14,261],"becoming":[15],"increasingly":[16],"prevalent.":[17],"However,":[18],"most":[19,186],"the":[21,27,53,59,90,113,143,152,188,201,219,254],"measures":[22],"currently":[23],"used":[24,120],"evaluate":[26],"performance":[28,114,153,167,178,193],"these":[30,68,245],"models":[31,157,189,246],"this":[33],"context":[34],"only":[35],"measure":[36,264],"how":[37,244],"closely":[38],"a":[39,49,72,86,139,262],"model's":[40,54],"answers":[41],"match":[42],"semantically,":[43],"and":[44,100,118,146,214,252],"therefore":[45],"do":[46],"not":[47,237],"provide":[48],"true":[50],"indication":[51],"accuracy":[56],"or":[57],"health":[60,183,203],"equity":[61],"risks":[62],"associated":[63],"with":[64,185],"it.":[65],"To":[66],"address":[67],"shortcomings,":[69],"we":[70,137],"present":[71],"new":[73],"evaluation":[74,88,259],"framework":[75],"for":[76,104,199,239],"question":[78,255],"answering":[79],"called":[80],"VB-Score":[81],"(Verification-Based":[82],"Score)":[83],"that":[84,159,205,210,232],"provides":[85],"separate":[87],"four":[91],"components":[92],"entity":[94,147],"recognition,":[95],"semantic":[96,145,258],"similarity,":[97],"factual":[98],"consistency,":[99],"structured":[101],"information":[102,131],"completeness":[103],"question-answering":[106],"models.":[107],"We":[108],"perform":[109,247],"rigorous":[110],"reviews":[111],"three":[116,156],"well-known":[117],"widely":[119],"LLMs":[121],"on":[122,134,243],"48":[123],"public":[124,182,202],"health-related":[125],"topics":[126,204],"taken":[127],"from":[128],"high-quality,":[129],"authoritative":[130],"sources.":[132],"Based":[133],"our":[135,172],"analyses,":[136],"discover":[138],"major":[140],"discrepancy":[141],"between":[142],"models'":[144],"accuracy.":[148],"Our":[149,174,228],"assessments":[150],"all":[155,200],"show":[158],"each":[160],"them":[162],"has":[163],"almost":[164],"uniformly":[165],"severe":[166],"failures":[168],"when":[169],"evaluated":[170],"against":[171],"criteria.":[173],"findings":[175,229],"indicate":[176],"alarming":[177],"disparities":[179],"across":[180],"various":[181],"topics,":[184],"exhibiting":[190],"13.8%":[191],"lower":[192],"(compared":[194],"an":[196],"overall":[197],"average)":[198],"relate":[206],"chronic":[208],"conditions":[209],"occur":[211],"older":[213],"minority":[215],"populations,":[216],"which":[217],"indicates":[218],"existence":[220],"what's":[222],"known":[223],"as":[224],"condition-based":[225],"algorithmic":[226],"discrimination.":[227],"also":[230],"demonstrate":[231],"prompt":[233],"engineering":[234],"alone":[235,260],"does":[236],"compensate":[238],"basic":[240],"architectural":[241],"limitations":[242],"extracting":[249],"entities":[251],"raise":[253],"whether":[257],"sufficient":[263],"AI":[267],"safety.":[268]},"counts_by_year":[],"updated_date":"2026-04-23T06:20:18.424754","created_date":"2026-04-23T00:00:00"}
