{"id":"https://openalex.org/W7113900330","doi":"https://doi.org/10.1145/3765612.3767224","title":"GamELY: Human-in-the loop Framework for Scaling Human Evaluation of LLMs in Healthcare","display_name":"GamELY: Human-in-the loop Framework for Scaling Human Evaluation of LLMs in Healthcare","publication_year":2025,"publication_date":"2025-10-12","ids":{"openalex":"https://openalex.org/W7113900330","doi":"https://doi.org/10.1145/3765612.3767224"},"language":null,"primary_location":{"id":"doi:10.1145/3765612.3767224","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3765612.3767224","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Raghav Awasthi","orcid":"https://orcid.org/0000-0002-6643-4333"},"institutions":[{"id":"https://openalex.org/I4210147129","display_name":"BrainAid (United States)","ror":"https://ror.org/04k00np26","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147129"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Raghav Awasthi","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA","institution_ids":["https://openalex.org/I4210147129"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nishant Singh","orcid":"https://orcid.org/0009-0003-3692-7254"},"institutions":[{"id":"https://openalex.org/I1314596251","display_name":"Allen Institute for Brain Science","ror":"https://ror.org/00dcv1019","country_code":"US","type":"facility","lineage":["https://openalex.org/I1314596251","https://openalex.org/I4210140341"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nishant Singh","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Boston, MA, USA","institution_ids":["https://openalex.org/I1314596251"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Shreya Mishra","orcid":"https://orcid.org/0009-0000-1052-5546"},"institutions":[{"id":"https://openalex.org/I4210147129","display_name":"BrainAid (United States)","ror":"https://ror.org/04k00np26","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shreya Mishra","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA","institution_ids":["https://openalex.org/I4210147129"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Atharva Bhattad","orcid":"https://orcid.org/0009-0001-8041-9105"},"institutions":[{"id":"https://openalex.org/I4210147129","display_name":"BrainAid (United States)","ror":"https://ror.org/04k00np26","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Atharva Bhattad","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Cleveland, Ohio, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Cleveland, Ohio, USA","institution_ids":["https://openalex.org/I4210147129"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Moises Auron","orcid":"https://orcid.org/0000-0001-6398-6047"},"institutions":[{"id":"https://openalex.org/I4210147129","display_name":"BrainAid (United States)","ror":"https://ror.org/04k00np26","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Moises Auron","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA","institution_ids":["https://openalex.org/I4210147129"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Charumathi Raghu Subramanian","orcid":"https://orcid.org/0000-0003-1283-0514"},"institutions":[{"id":"https://openalex.org/I4210147129","display_name":"BrainAid (United States)","ror":"https://ror.org/04k00np26","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Charumathi Raghu Subramanian","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Cleveland, Ohio, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Cleveland, Ohio, USA","institution_ids":["https://openalex.org/I4210147129"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ashish Atreja","orcid":"https://orcid.org/0009-0000-8170-2861"},"institutions":[{"id":"https://openalex.org/I4210147129","display_name":"BrainAid (United States)","ror":"https://ror.org/04k00np26","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ashish Atreja","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA","institution_ids":["https://openalex.org/I4210147129"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kamal Maheshwari","orcid":"https://orcid.org/0000-0003-1961-5577"},"institutions":[{"id":"https://openalex.org/I4210147129","display_name":"BrainAid (United States)","ror":"https://ror.org/04k00np26","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kamal Maheshwari","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA","institution_ids":["https://openalex.org/I4210147129"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Dwarikanath Mahapatra","orcid":"https://orcid.org/0000-0001-9749-7858"},"institutions":[{"id":"https://openalex.org/I1314596251","display_name":"Allen Institute for Brain Science","ror":"https://ror.org/00dcv1019","country_code":"US","type":"facility","lineage":["https://openalex.org/I1314596251","https://openalex.org/I4210140341"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dwarikanath Mahapatra","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Abu Dhabi, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Abu Dhabi, USA","institution_ids":["https://openalex.org/I1314596251"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jacek B. Cywinski","orcid":"https://orcid.org/0000-0001-6283-3049"},"institutions":[{"id":"https://openalex.org/I4210147129","display_name":"BrainAid (United States)","ror":"https://ror.org/04k00np26","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jacek B. Cywinski","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA","institution_ids":["https://openalex.org/I4210147129"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ashish Khanna","orcid":"https://orcid.org/0000-0002-9083-891X"},"institutions":[{"id":"https://openalex.org/I4210147129","display_name":"BrainAid (United States)","ror":"https://ror.org/04k00np26","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ashish Khanna","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA","institution_ids":["https://openalex.org/I4210147129"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Francis Papay","orcid":"https://orcid.org/0000-0003-0455-2892"},"institutions":[{"id":"https://openalex.org/I4210147129","display_name":"BrainAid (United States)","ror":"https://ror.org/04k00np26","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Francis Papay","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA","institution_ids":["https://openalex.org/I4210147129"]}]},{"author_position":"last","author":{"id":null,"display_name":"Piyush Mathur","orcid":"https://orcid.org/0000-0003-3777-8767"},"institutions":[{"id":"https://openalex.org/I4210147129","display_name":"BrainAid (United States)","ror":"https://ror.org/04k00np26","country_code":"US","type":"company","lineage":["https://openalex.org/I4210147129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Piyush Mathur","raw_affiliation_strings":["BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA"],"affiliations":[{"raw_affiliation_string":"BrainXAI ReSearch, BrainX,LLC., Cleveland, OH, USA","institution_ids":["https://openalex.org/I4210147129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":13,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210147129"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.80145865,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.7200999855995178,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.7200999855995178,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.11410000175237656,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.02239999920129776,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6154999732971191},{"id":"https://openalex.org/keywords/health-care","display_name":"Health care","score":0.5058000087738037},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4814000129699707},{"id":"https://openalex.org/keywords/human-health","display_name":"Human health","score":0.42739999294281006},{"id":"https://openalex.org/keywords/risk-assessment","display_name":"Risk assessment","score":0.3693000078201294},{"id":"https://openalex.org/keywords/gold-standard","display_name":"Gold standard (test)","score":0.36230000853538513},{"id":"https://openalex.org/keywords/human-in-the-loop","display_name":"Human-in-the-loop","score":0.36059999465942383}],"concepts":[{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6154999732971191},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5903000235557556},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.5058000087738037},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.487199991941452},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4814000129699707},{"id":"https://openalex.org/C2987857752","wikidata":"https://www.wikidata.org/wiki/Q12147","display_name":"Human health","level":2,"score":0.42739999294281006},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.3693000078201294},{"id":"https://openalex.org/C40993552","wikidata":"https://www.wikidata.org/wiki/Q514654","display_name":"Gold standard (test)","level":2,"score":0.36230000853538513},{"id":"https://openalex.org/C2780626000","wikidata":"https://www.wikidata.org/wiki/Q5936775","display_name":"Human-in-the-loop","level":2,"score":0.36059999465942383},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3287000060081482},{"id":"https://openalex.org/C44280652","wikidata":"https://www.wikidata.org/wiki/Q104837","display_name":"Phase (matter)","level":2,"score":0.3070000112056732},{"id":"https://openalex.org/C169437150","wikidata":"https://www.wikidata.org/wiki/Q8458","display_name":"Human rights","level":2,"score":0.2976999878883362},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.2773999869823456},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2770000100135803},{"id":"https://openalex.org/C3018395757","wikidata":"https://www.wikidata.org/wiki/Q1379672","display_name":"Evaluation methods","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2703000009059906},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2587999999523163},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3765612.3767224","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3765612.3767224","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.535047173500061,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1442829153","https://openalex.org/W2113462162","https://openalex.org/W2740924709","https://openalex.org/W2895486342","https://openalex.org/W3172996796","https://openalex.org/W3192495122","https://openalex.org/W3194710570","https://openalex.org/W4231576061","https://openalex.org/W4390266803","https://openalex.org/W4402909345","https://openalex.org/W4408220858"],"related_works":[],"abstract_inverted_index":{"Human":[0],"evaluation":[1,42,68,90,190],"is":[2,48],"considered":[3],"the":[4,53,79,102,112,142],"gold":[5],"standard":[6],"for":[7,39,88,105,144,180],"evaluating":[8],"large":[9],"language":[10],"models":[11],"(LLMs)":[12],"performance":[13,138],"in":[14,45,52,94],"healthcare.":[15,46],"However,":[16],"it":[17,126],"presents":[18],"significant":[19],"challenges,":[20],"being":[21],"resource-intensive,":[22],"time-consuming,":[23],"and":[24,74,84,154,157,177,187],"difficult":[25],"to":[26,60,91,109,120,172],"scale.":[27],"To":[28,147],"address":[29],"these":[30,149],"limitations,":[31],"we":[32,151],"propose":[33],"GamELY,":[34],"a":[35,49,121],"Human-in-the-Loop":[36],"(HITL)":[37],"framework":[38],"scalable":[40,186],"human":[41,67,89,131,167,189],"of":[43,101,191],"LLMs":[44,64,192],"GamELY":[47,184],"two-phase":[50],"system:":[51],"first":[54],"phase,":[55],"well-structured":[56],"prompts":[57],"are":[58],"used":[59,87],"generate":[61],"scores":[62],"from":[63],"across":[65,140],"diverse":[66],"metrics":[69,134],"(Relevance,":[70],"Coverage,":[71],"Coherence,":[72],"Harm,":[73,136],"Comparison).":[75],"The":[76,96],"prompt":[77],"uses":[78],"same":[80],"metrics,":[81],"assessment":[82],"questions":[83],"scale":[85],"as":[86,111],"ensure":[92],"uniformity":[93],"evaluation.":[95],"second":[97],"phase":[98],"involves":[99],"subsampling":[100,156],"full":[103],"dataset":[104],"auditing,":[106],"enabling":[107],"humans":[108],"serve":[110],"final":[113],"judges":[114],"while":[115],"minimizing":[116],"their":[117],"workload.":[118],"Applied":[119],"preventive":[122],"healthcare":[123],"QA":[124],"dataset,":[125],"showed":[127,163],"84%":[128],"agreement":[129,165],"with":[130,166,193],"evaluations":[132,168],"on":[133],"like":[135],"though":[137],"varied":[139],"others\u2014highlighting":[141],"need":[143],"HITL":[145],"auditing.":[146,182],"guide":[148],"audits,":[150],"compared":[152],"random":[153],"anomaly-based":[155,160],"found":[158],"that":[159],"samples":[161],"typically":[162],"lower":[164],"(e.g.,":[169],"Accuracy:":[170],"61%":[171],"40%),":[173],"indicating":[174],"reduced":[175],"consensus":[176],"greater":[178],"potential":[179],"targeted":[181],"Overall,":[183],"offers":[185],"efficient":[188],"simple":[194],"deployment.":[195]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-11T00:00:00"}
