{"id":"https://openalex.org/W4401863437","doi":"https://doi.org/10.1145/3637528.3671575","title":"Towards Automatic Evaluation for LLMs' Clinical Capabilities: Metric, Data, and Algorithm","display_name":"Towards Automatic Evaluation for LLMs' Clinical Capabilities: Metric, Data, and Algorithm","publication_year":2024,"publication_date":"2024-08-24","ids":{"openalex":"https://openalex.org/W4401863437","doi":"https://doi.org/10.1145/3637528.3671575"},"language":"en","primary_location":{"id":"doi:10.1145/3637528.3671575","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3671575","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059565360","display_name":"Lei Liu","orcid":"https://orcid.org/0000-0001-8109-5248"},"institutions":[{"id":"https://openalex.org/I4210116924","display_name":"Chinese University of Hong Kong, Shenzhen","ror":"https://ror.org/02d5ks197","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633","https://openalex.org/I180726961","https://openalex.org/I4210116924"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lei Liu","raw_affiliation_strings":["The Chinese University of Hong Kong, Shenzhen, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Shenzhen, Shenzhen, China","institution_ids":["https://openalex.org/I4210116924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055996874","display_name":"Xiaoyan Yang","orcid":"https://orcid.org/0000-0001-7799-8460"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaoyan Yang","raw_affiliation_strings":["Ant Group, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100600844","display_name":"Fangzhou Li","orcid":"https://orcid.org/0000-0002-4999-4662"},"institutions":[{"id":"https://openalex.org/I2800570007","display_name":"Renji Hospital","ror":"https://ror.org/03ypbx660","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2800570007"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangzhou Li","raw_affiliation_strings":["Renji Hospital, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Renji Hospital, Shanghai, China","institution_ids":["https://openalex.org/I2800570007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057863237","display_name":"Chenfei Chi","orcid":"https://orcid.org/0000-0003-4075-6147"},"institutions":[{"id":"https://openalex.org/I2800570007","display_name":"Renji Hospital","ror":"https://ror.org/03ypbx660","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2800570007"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenfei Chi","raw_affiliation_strings":["Renji Hospital, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Renji Hospital, Shanghai, China","institution_ids":["https://openalex.org/I2800570007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007885672","display_name":"Yue Shen","orcid":"https://orcid.org/0000-0002-1046-9000"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yue Shen","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071894868","display_name":"Shiwei Lyu","orcid":"https://orcid.org/0000-0001-9493-0601"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shiwei Lyu","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100447228","display_name":"Ming Zhang","orcid":"https://orcid.org/0000-0002-1160-2635"},"institutions":[{"id":"https://openalex.org/I2800570007","display_name":"Renji Hospital","ror":"https://ror.org/03ypbx660","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2800570007"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Zhang","raw_affiliation_strings":["Renji Hospital, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Renji Hospital, Shanghai, China","institution_ids":["https://openalex.org/I2800570007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007556918","display_name":"Xiaowei Ma","orcid":"https://orcid.org/0000-0002-0105-732X"},"institutions":[{"id":"https://openalex.org/I2800570007","display_name":"Renji Hospital","ror":"https://ror.org/03ypbx660","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2800570007"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaowei Ma","raw_affiliation_strings":["Renji Hospital, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Renji Hospital, Shanghai, China","institution_ids":["https://openalex.org/I2800570007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087578815","display_name":"Xiangguo Lv","orcid":"https://orcid.org/0009-0003-5946-0351"},"institutions":[{"id":"https://openalex.org/I2800570007","display_name":"Renji Hospital","ror":"https://ror.org/03ypbx660","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2800570007"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangguo Lv","raw_affiliation_strings":["Renji Hospital, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Renji Hospital, Shanghai, China","institution_ids":["https://openalex.org/I2800570007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100308149","display_name":"Liya Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I2800570007","display_name":"Renji Hospital","ror":"https://ror.org/03ypbx660","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2800570007"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liya Ma","raw_affiliation_strings":["Renji Hospital, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Renji Hospital, Shanghai, China","institution_ids":["https://openalex.org/I2800570007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032099283","display_name":"Zhiqiang Zhang","orcid":"https://orcid.org/0000-0002-2321-7259"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhiqiang Zhang","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100408966","display_name":"Wei Xue","orcid":"https://orcid.org/0000-0003-0124-9433"},"institutions":[{"id":"https://openalex.org/I2800570007","display_name":"Renji Hospital","ror":"https://ror.org/03ypbx660","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2800570007"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Xue","raw_affiliation_strings":["Renji Hospital, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Renji Hospital, Shanghai, China","institution_ids":["https://openalex.org/I2800570007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052533635","display_name":"Yiran Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I2800570007","display_name":"Renji Hospital","ror":"https://ror.org/03ypbx660","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I2800570007"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiran Huang","raw_affiliation_strings":["Renji Hospital, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Renji Hospital, Shanghai, China","institution_ids":["https://openalex.org/I2800570007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053242349","display_name":"Jinjie Gu","orcid":"https://orcid.org/0000-0001-7596-4945"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jinjie Gu","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5059565360"],"corresponding_institution_ids":["https://openalex.org/I4210116924"],"apc_list":null,"apc_paid":null,"fwci":1.5648,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.84000502,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5466","last_page":"5475"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.9769999980926514,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.9769999980926514,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11894","display_name":"Radiology practices and education","score":0.9725000262260437,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.9602000117301941,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5956676006317139},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5805627107620239},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.42812925577163696},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1044144332408905}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5956676006317139},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5805627107620239},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.42812925577163696},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1044144332408905},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3637528.3671575","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3671575","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2000191857","https://openalex.org/W2168490582","https://openalex.org/W2286778172","https://openalex.org/W2886281300","https://openalex.org/W4319460874","https://openalex.org/W4361289889","https://openalex.org/W4379508361","https://openalex.org/W4389520259"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2051487156","https://openalex.org/W2073681303","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"are":[4,124,181,229],"gaining":[5],"increasing":[6],"interests":[7],"to":[8,13,51,66,105,153,172,192,197,231],"improve":[9],"clinical":[10,24,73,95,102,108,195,249],"efficiency,":[11],"owing":[12],"their":[14],"unprecedented":[15],"performance":[16],"in":[17,71,182,212,248],"modelling":[18],"natural":[19],"language.":[20],"Ensuring":[21],"the":[22,26,36,68,107,121,127,139,142,155,175,200,213,233,236],"reliable":[23,246],"applications,":[25],"evaluation":[27,43,63,81,143,210],"of":[28,141,177,215,235],"LLMs":[29],"indeed":[30],"becomes":[31],"critical":[32],"for":[33,129,133,242],"better":[34],"mitigating":[35],"potential":[37],"risks,":[38],"e.g.,":[39,75],"hallucinations.":[40],"However,":[41],"current":[42],"methods":[44],"heavily":[45],"rely":[46],"on":[47],"labor-intensive":[48],"human":[49],"participation":[50],"achieve":[52],"human-preferred":[53],"judgements.":[54],"To":[55],"overcome":[56],"this":[57],"challenge,":[58],"we":[59,98,148,207],"propose":[60],"an":[61,209,224],"automatic":[62],"paradigm":[64,82,188],"tailored":[65],"assess":[67],"LLMs'":[69,201,243],"capabilities":[70,109],"delivering":[72],"services,":[74],"disease":[76],"diagnosis":[77],"and":[78,89,160,223,245],"treatment.":[79],"The":[80,186],"contains":[83],"three":[84],"basic":[85],"elements:":[86],"metric,":[87],"data,":[88],"algorithm.":[90],"Specifically,":[91],"inspired":[92],"by":[93],"professional":[94],"practice":[96],"pathways,":[97],"formulate":[99],"a":[100,111,150,161,168,178,218,220],"LLM-specific":[101],"pathway":[103],"(LCP)":[104],"define":[106],"that":[110],"doctor":[112,162,179],"agent":[113,180],"should":[114],"possess.":[115],"Then,":[116],"Standardized":[117],"Patients":[118],"(SPs)":[119],"from":[120],"medical":[122,131,202],"education":[123],"introduced":[125],"as":[126],"guideline":[128],"collecting":[130],"data":[132],"evaluation,":[134],"which":[135,164],"can":[136,189],"well":[137],"ensure":[138],"completeness":[140],"procedure.":[144],"Leveraging":[145],"these":[146],"steps,":[147],"develop":[149],"multi-agent":[151],"framework":[152],"simulate":[154],"interactive":[156],"environment":[157],"between":[158],"SPs":[159,221],"agent,":[163],"is":[165],"equipped":[166],"with":[167,184],"Retrieval-Augmented":[169],"Evaluation":[170],"(RAE)":[171],"determine":[173],"whether":[174],"behaviors":[176],"accordance":[183],"LCP.":[185],"above":[187],"be":[190],"extended":[191],"any":[193],"similar":[194],"scenarios":[196],"automatically":[198],"evaluate":[199],"capabilities.":[203],"Applying":[204],"such":[205],"paradigm,":[206],"construct":[208],"benchmark":[211],"field":[214],"urology,":[216],"including":[217],"LCP,":[219],"dataset,":[222],"automated":[225],"RAE.":[226],"Extensive":[227],"experiments":[228],"conducted":[230],"demonstrate":[232],"effectiveness":[234],"proposed":[237],"approach,":[238],"providing":[239],"more":[240],"insights":[241],"safe":[244],"deployments":[247],"practice.":[250]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
