{"id":"https://openalex.org/W7125235132","doi":"https://doi.org/10.48550/arxiv.2601.13268","title":"Improving the Safety and Trustworthiness of Medical AI via Multi-Agent Evaluation Loops","display_name":"Improving the Safety and Trustworthiness of Medical AI via Multi-Agent Evaluation Loops","publication_year":2026,"publication_date":"2026-01-19","ids":{"openalex":"https://openalex.org/W7125235132","doi":"https://doi.org/10.48550/arxiv.2601.13268"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.13268","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.13268","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.13268","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123524643","display_name":"Zainab Ghafoor","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ghafoor, Zainab","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123457377","display_name":"Md. Shafiqul Islam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Islam, Md Shafiqul","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058742577","display_name":"Koushik Chandra Howlader","orcid":"https://orcid.org/0000-0001-9392-9801"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Howlader, Koushik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123468587","display_name":"Md Rasel Khondokar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khondokar, Md Rasel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123535923","display_name":"Tanusree Bhattacharjee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhattacharjee, Tanusree","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101727280","display_name":"Sayantan Chakraborty","orcid":"https://orcid.org/0000-0002-7246-0589"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chakraborty, Sayantan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123460529","display_name":"Adrito Roy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roy, Adrito","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Bhattacharjee, Ushashi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bhattacharjee, Ushashi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5114201231","display_name":"Tirtho Roy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roy, Tirtho","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5123524643"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.6929000020027161,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.6929000020027161,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.13410000503063202,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.041200000792741776,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/downgrade","display_name":"Downgrade","score":0.6039000153541565},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5260000228881836},{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.5238000154495239},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.42969998717308044},{"id":"https://openalex.org/keywords/risk-assessment","display_name":"Risk assessment","score":0.428600013256073},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.3995000123977661},{"id":"https://openalex.org/keywords/compliance","display_name":"Compliance (psychology)","score":0.36390000581741333},{"id":"https://openalex.org/keywords/medical-ethics","display_name":"Medical ethics","score":0.3443000018596649}],"concepts":[{"id":"https://openalex.org/C2779628075","wikidata":"https://www.wikidata.org/wiki/Q1253258","display_name":"Downgrade","level":2,"score":0.6039000153541565},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5415999889373779},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5260000228881836},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.5238000154495239},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.5232999920845032},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.42969998717308044},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.428600013256073},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3995000123977661},{"id":"https://openalex.org/C2781460075","wikidata":"https://www.wikidata.org/wiki/Q1399332","display_name":"Compliance (psychology)","level":2,"score":0.36390000581741333},{"id":"https://openalex.org/C126086293","wikidata":"https://www.wikidata.org/wiki/Q237151","display_name":"Medical ethics","level":2,"score":0.3443000018596649},{"id":"https://openalex.org/C2779328685","wikidata":"https://www.wikidata.org/wiki/Q1475557","display_name":"Patient safety","level":3,"score":0.3402999937534332},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.33390000462532043},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3328999876976013},{"id":"https://openalex.org/C33762810","wikidata":"https://www.wikidata.org/wiki/Q461671","display_name":"Data integrity","level":2,"score":0.320499986410141},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.3034000098705292},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C2993880109","wikidata":"https://www.wikidata.org/wiki/Q1163564","display_name":"Medical screening","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C32896092","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Risk management","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25870001316070557},{"id":"https://openalex.org/C2778850959","wikidata":"https://www.wikidata.org/wiki/Q3275615","display_name":"Clinical engineering","level":3,"score":0.25690001249313354},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.13268","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.13268","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.13268","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.13268","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7027570605278015}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"are":[4],"increasingly":[5],"applied":[6],"in":[7,137],"healthcare,":[8],"yet":[9],"ensuring":[10],"their":[11],"ethical":[12,96,101,138],"integrity":[13],"and":[14,36,54,63,78,104,140,158],"safety":[15,35],"compliance":[16],"remains":[17],"a":[18,27,79,141,155],"major":[19],"barrier":[20],"to":[21,32],"clinical":[22],"deployment.":[23],"This":[24,152],"work":[25],"introduces":[26],"multi-agent":[28,131],"refinement":[29],"framework":[30],"designed":[31],"enhance":[33],"the":[34,69,147],"reliability":[37],"of":[38,75,126,149],"medical":[39,163],"LLMs":[40],"through":[41],"structured,":[42],"iterative":[43,130],"alignment.":[44],"Our":[45],"system":[46],"combines":[47],"two":[48,58],"generative":[49],"models":[50],"-":[51,56],"DeepSeek":[52,111],"R1":[53,112],"Med-PaLM":[55,122],"with":[57],"evaluation":[59],"agents,":[60],"LLaMA":[61],"3.1":[62],"Phi-4,":[64],"which":[65],"assess":[66],"responses":[67],"using":[68],"American":[70],"Medical":[71,76],"Association's":[72],"(AMA)":[73],"Principles":[74],"Ethics":[77],"five-tier":[80],"Safety":[81],"Risk":[82],"Assessment":[83],"(SRA-5)":[84],"protocol.":[85],"We":[86],"evaluate":[87],"performance":[88],"across":[89],"900":[90],"clinically":[91],"diverse":[92],"queries":[93],"spanning":[94],"nine":[95],"domains,":[97],"measuring":[98],"convergence":[99,115],"efficiency,":[100],"violation":[102],"reduction,":[103],"domain-specific":[105],"risk":[106,143],"behavior.":[107],"Results":[108],"demonstrate":[109],"that":[110],"achieves":[113],"faster":[114],"(mean":[116],"2.34":[117],"vs.":[118],"2.67":[119],"iterations),":[120],"while":[121],"shows":[123],"superior":[124],"handling":[125],"privacy-sensitive":[127],"scenarios.":[128],"The":[129],"loop":[132],"achieved":[133],"an":[134],"89%":[135],"reduction":[136],"violations":[139],"92%":[142],"downgrade":[144],"rate,":[145],"underscoring":[146],"effectiveness":[148],"our":[150],"approach.":[151],"study":[153],"presents":[154],"scalable,":[156],"regulator-aligned,":[157],"cost-efficient":[159],"paradigm":[160],"for":[161],"governing":[162],"AI":[164],"safety.":[165]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2026-01-22T00:00:00"}
