{"id":"https://openalex.org/W7162424985","doi":"https://doi.org/10.48550/arxiv.2605.25273","title":"LLM-as-a-Judge in Healthcare: A Scoping Analysis of Applications, Methods, and Human Alignment","display_name":"LLM-as-a-Judge in Healthcare: A Scoping Analysis of Applications, Methods, and Human Alignment","publication_year":2026,"publication_date":"2026-05-24","ids":{"openalex":"https://openalex.org/W7162424985","doi":"https://doi.org/10.48550/arxiv.2605.25273"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.25273","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25273","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.25273","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137075288","display_name":"Lingyao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Lingyao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137036953","display_name":"Deyi Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Deyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137061525","display_name":"Chen Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066977856","display_name":"Renkai Ma","orcid":"https://orcid.org/0000-0002-4434-2235"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Renkai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100767610","display_name":"Runlong Yu","orcid":"https://orcid.org/0000-0003-4080-2377"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Runlong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137013586","display_name":"Mingquan Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Mingquan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137065656","display_name":"Rui Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Rui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137016123","display_name":"Lizhou Fan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fan, Lizhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137014587","display_name":"Cathy Shyr","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shyr, Cathy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137071740","display_name":"Siyuan Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Siyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137050272","display_name":"Mei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Mei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5137079207","display_name":"Steven Bethard","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bethard, Steven","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.4690999984741211,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.4690999984741211,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.3889999985694885,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.059300001710653305,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/health-care","display_name":"Health care","score":0.5888000130653381},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.4918000102043152},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.448199987411499},{"id":"https://openalex.org/keywords/precision-medicine","display_name":"Precision medicine","score":0.3779999911785126},{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.3652999997138977},{"id":"https://openalex.org/keywords/clinical-decision-support-system","display_name":"Clinical decision support system","score":0.34769999980926514},{"id":"https://openalex.org/keywords/clinical-decision-making","display_name":"Clinical decision making","score":0.34119999408721924},{"id":"https://openalex.org/keywords/health-technology","display_name":"Health technology","score":0.3353999853134155},{"id":"https://openalex.org/keywords/human-health","display_name":"Human health","score":0.31189998984336853}],"concepts":[{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.5888000130653381},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5529000163078308},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5159000158309937},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4918000102043152},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.448199987411499},{"id":"https://openalex.org/C163763905","wikidata":"https://www.wikidata.org/wiki/Q17075943","display_name":"Precision medicine","level":2,"score":0.3779999911785126},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.3652999997138977},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.364300012588501},{"id":"https://openalex.org/C63527458","wikidata":"https://www.wikidata.org/wiki/Q5133829","display_name":"Clinical decision support system","level":3,"score":0.34769999980926514},{"id":"https://openalex.org/C2989179672","wikidata":"https://www.wikidata.org/wiki/Q6806500","display_name":"Clinical decision making","level":2,"score":0.34119999408721924},{"id":"https://openalex.org/C21333345","wikidata":"https://www.wikidata.org/wiki/Q1519843","display_name":"Health technology","level":3,"score":0.3353999853134155},{"id":"https://openalex.org/C2987857752","wikidata":"https://www.wikidata.org/wiki/Q12147","display_name":"Human health","level":2,"score":0.31189998984336853},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.30799999833106995},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.3018999993801117},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29919999837875366},{"id":"https://openalex.org/C191147762","wikidata":"https://www.wikidata.org/wiki/Q186289","display_name":"Human reliability","level":3,"score":0.2985000014305115},{"id":"https://openalex.org/C106977388","wikidata":"https://www.wikidata.org/wiki/Q2752427","display_name":"Medical research","level":2,"score":0.29649999737739563},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.29420000314712524},{"id":"https://openalex.org/C19648533","wikidata":"https://www.wikidata.org/wiki/Q691640","display_name":"Evidence-based medicine","level":3,"score":0.2937999963760376},{"id":"https://openalex.org/C107327155","wikidata":"https://www.wikidata.org/wiki/Q330268","display_name":"Decision support system","level":2,"score":0.2913999855518341},{"id":"https://openalex.org/C2779328685","wikidata":"https://www.wikidata.org/wiki/Q1475557","display_name":"Patient safety","level":3,"score":0.29120001196861267},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.28949999809265137},{"id":"https://openalex.org/C2779974597","wikidata":"https://www.wikidata.org/wiki/Q28448986","display_name":"Clinical Practice","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C2988170871","wikidata":"https://www.wikidata.org/wiki/Q11000047","display_name":"Healthcare system","level":3,"score":0.2711000144481659},{"id":"https://openalex.org/C148482608","wikidata":"https://www.wikidata.org/wiki/Q17007018","display_name":"Clinical study design","level":3,"score":0.2556999921798706},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.2540999948978424}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.25273","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25273","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.25273","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.25273","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.48447123169898987,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,150],"models":[2,163],"(LLMs)":[3],"are":[4,22,129,164],"increasingly":[5,53],"deployed":[6],"across":[7,207],"healthcare":[8,62,85,220],"applications,":[9],"including":[10],"clinical":[11,25,56,145,148,227],"documentation,":[12],"diagnostic":[13],"reasoning,":[14],"medicine":[15],"recommendation,":[16],"and":[17,51,86,116,128,138,155,159,170,181,233],"medical":[18,153,160],"education.":[19],"Their":[20],"outputs":[21],"largely":[23],"unstructured":[24],"text,":[26],"which":[27,37],"is":[28,52,82,142],"difficult":[29],"to":[30,197],"reliably":[31],"evaluate":[32],"at":[33],"scale.":[34],"LLM-as-a-Judge,":[35],"in":[36,55,61,84,105,144,174],"an":[38],"LLM":[39,70,192],"evaluates":[40],"another":[41],"system's":[42],"output":[43],"against":[44],"task-specific":[45],"criteria,":[46],"offers":[47],"a":[48,98,215],"scalable":[49,219],"alternative":[50],"used":[54,168],"evaluation,":[57,222],"yet":[58],"its":[59,226],"validity":[60],"remains":[63],"underexamined.":[64],"Existing":[65],"reviews":[66],"focus":[67],"on":[68,73,230],"general-purpose":[69],"evaluation":[71],"or":[72],"risk":[74],"framework,":[75],"rather":[76],"than":[77],"systematically":[78],"characterizing":[79],"how":[80,87],"LLM-as-a-Judge":[81,103,141,213],"applied":[83],"well":[88],"their":[89],"judgments":[90],"align":[91],"with":[92,178,200],"human":[93,190],"experts.":[94],"We":[95],"therefore":[96],"conduct":[97],"PRISMA-guided":[99],"comprehensive":[100],"review":[101,211],"of":[102],"applications":[104],"healthcare,":[106],"searching":[107],"five":[108],"databases":[109],"for":[110,218],"studies":[111,124,188],"published":[112],"between":[113],"January":[114],"2023":[115],"February":[117],"2026.":[118],"After":[119],"screening":[120],"541":[121],"records,":[122],"134":[123],"meet":[125],"the":[126,165],"eligibility":[127],"coded":[130],"by":[131],"health":[132],"scenario,":[133],"judge":[134],"configuration,":[135],"technical":[136],"approach,":[137],"validation":[139],"design.":[140],"concentrated":[143],"decision":[146],"support,":[147],"natural":[149],"processing":[151],"(NLP),":[152],"knowledge":[154],"question":[156],"answering":[157],"(QA),":[158],"communication.":[161],"OpenAI":[162],"most":[166],"frequently":[167],"judges,":[169],"prompt":[171],"engineering":[172],"appears":[173],"nearly":[175],"all":[176],"studies,":[177],"ensemble,":[179],"multi-agent,":[180],"retrieval-augmented":[182],"designs":[183],"as":[184,214],"common":[185],"extensions.":[186],"Among":[187],"reporting":[189],"validation,":[191],"judges":[193],"often":[194],"show":[195],"moderate":[196],"strong":[198],"alignment":[199],"expert":[201],"judgments,":[202],"although":[203],"reliability":[204],"varies":[205],"substantially":[206],"tasks.":[208],"Overall,":[209],"this":[210],"positions":[212],"promising":[216],"framework":[217],"AI":[221],"while":[223],"emphasizing":[224],"that":[225],"value":[228],"depends":[229],"model":[231],"design":[232],"rigorous":[234],"validation.":[235]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-27T00:00:00"}
