{"id":"https://openalex.org/W7154028955","doi":"https://doi.org/10.1145/3772318.3791172","title":"Building Benchmarks from the Ground Up: Community-Centered Evaluation of LLMs in Healthcare Chatbot Settings","display_name":"Building Benchmarks from the Ground Up: Community-Centered Evaluation of LLMs in Healthcare Chatbot Settings","publication_year":2026,"publication_date":"2026-04-13","ids":{"openalex":"https://openalex.org/W7154028955","doi":"https://doi.org/10.1145/3772318.3791172"},"language":null,"primary_location":{"id":"doi:10.1145/3772318.3791172","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772318.3791172","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3772318.3791172","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133486790","display_name":"Hamna Hamna","orcid":"https://orcid.org/0009-0008-9684-1192"},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Hamna Hamna","raw_affiliation_strings":["Microsoft Research India, Microsoft Corporation, Bangalore, Karnataka, India"],"raw_orcid":"https://orcid.org/0009-0008-9684-1192","affiliations":[{"raw_affiliation_string":"Microsoft Research India, Microsoft Corporation, Bangalore, Karnataka, India","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085218914","display_name":"Gayatri Bhat","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gayatri Bhat","raw_affiliation_strings":["Karya, Bengaluru, India"],"raw_orcid":"https://orcid.org/0009-0009-4876-2171","affiliations":[{"raw_affiliation_string":"Karya, Bengaluru, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013013351","display_name":"Sourabrata Mukherjee","orcid":"https://orcid.org/0000-0002-1713-2769"},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sourabrata Mukherjee","raw_affiliation_strings":["Microsoft Research, Bengaluru, Karnataka, India"],"raw_orcid":"https://orcid.org/0000-0002-1713-2769","affiliations":[{"raw_affiliation_string":"Microsoft Research, Bengaluru, Karnataka, India","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066399723","display_name":"Faisal Lalani","orcid":"https://orcid.org/0000-0003-1209-8933"},"institutions":[{"id":"https://openalex.org/I4210126580","display_name":"RIKEN Center for Advanced Intelligence Project","ror":"https://ror.org/03ckxwf91","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210126580"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Faisal M. Lalani","raw_affiliation_strings":["Collective Intelligence Project, New York, New York, USA"],"raw_orcid":"https://orcid.org/0000-0003-1209-8933","affiliations":[{"raw_affiliation_string":"Collective Intelligence Project, New York, New York, USA","institution_ids":["https://openalex.org/I4210126580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126201334","display_name":"Evan Hadfield","orcid":null},"institutions":[{"id":"https://openalex.org/I4210126580","display_name":"RIKEN Center for Advanced Intelligence Project","ror":"https://ror.org/03ckxwf91","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210126580"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Evan Hadfield","raw_affiliation_strings":["Collective Intelligence Project, New York, New York, USA"],"raw_orcid":"https://orcid.org/0009-0002-8346-9148","affiliations":[{"raw_affiliation_string":"Collective Intelligence Project, New York, New York, USA","institution_ids":["https://openalex.org/I4210126580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020987927","display_name":"Divya Siddarth","orcid":"https://orcid.org/0009-0006-7073-6728"},"institutions":[{"id":"https://openalex.org/I4210126580","display_name":"RIKEN Center for Advanced Intelligence Project","ror":"https://ror.org/03ckxwf91","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210126580"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Divya Siddarth","raw_affiliation_strings":["Collective Intelligence Project, New York, New York, USA"],"raw_orcid":"https://orcid.org/0009-0006-7073-6728","affiliations":[{"raw_affiliation_string":"Collective Intelligence Project, New York, New York, USA","institution_ids":["https://openalex.org/I4210126580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013639574","display_name":"Kalika Bali","orcid":"https://orcid.org/0000-0001-9275-742X"},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Kalika Bali","raw_affiliation_strings":["Microsoft Research, Bangalore, India"],"raw_orcid":"https://orcid.org/0000-0001-9275-742X","affiliations":[{"raw_affiliation_string":"Microsoft Research, Bangalore, India","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005513786","display_name":"Sunayana Sitaram","orcid":"https://orcid.org/0000-0003-4251-9719"},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sunayana Sitaram","raw_affiliation_strings":["Microsoft Research, Bangalore, Karnataka, India"],"raw_orcid":"https://orcid.org/0000-0003-4251-9719","affiliations":[{"raw_affiliation_string":"Microsoft Research, Bangalore, Karnataka, India","institution_ids":["https://openalex.org/I4210124949"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5133486790"],"corresponding_institution_ids":["https://openalex.org/I4210124949"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.79425113,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.33160001039505005,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.33160001039505005,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.15929999947547913,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.08990000188350677,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/chatbot","display_name":"Chatbot","score":0.842199981212616},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6603000164031982},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6331999897956848},{"id":"https://openalex.org/keywords/health-care","display_name":"Health care","score":0.5376999974250793},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.40470001101493835},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.34860000014305115},{"id":"https://openalex.org/keywords/ehealth","display_name":"eHealth","score":0.3255999982357025}],"concepts":[{"id":"https://openalex.org/C2779041454","wikidata":"https://www.wikidata.org/wiki/Q870780","display_name":"Chatbot","level":2,"score":0.842199981212616},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6603000164031982},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6331999897956848},{"id":"https://openalex.org/C160735492","wikidata":"https://www.wikidata.org/wiki/Q31207","display_name":"Health care","level":2,"score":0.5376999974250793},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5232999920845032},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4668999910354614},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.4577000141143799},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.40470001101493835},{"id":"https://openalex.org/C39549134","wikidata":"https://www.wikidata.org/wiki/Q133080","display_name":"Public relations","level":1,"score":0.396699994802475},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.34860000014305115},{"id":"https://openalex.org/C55587333","wikidata":"https://www.wikidata.org/wiki/Q1133029","display_name":"Engineering ethics","level":1,"score":0.34700000286102295},{"id":"https://openalex.org/C202645933","wikidata":"https://www.wikidata.org/wiki/Q4930","display_name":"eHealth","level":3,"score":0.3255999982357025},{"id":"https://openalex.org/C2777877512","wikidata":"https://www.wikidata.org/wiki/Q1116097","display_name":"Common ground","level":2,"score":0.31299999356269836},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.30880001187324524},{"id":"https://openalex.org/C2775951005","wikidata":"https://www.wikidata.org/wiki/Q3473024","display_name":"Community health","level":3,"score":0.27070000767707825},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.26409998536109924},{"id":"https://openalex.org/C145642194","wikidata":"https://www.wikidata.org/wiki/Q870895","display_name":"Health informatics","level":3,"score":0.2615000009536743},{"id":"https://openalex.org/C191630685","wikidata":"https://www.wikidata.org/wiki/Q4027615","display_name":"Informatics","level":2,"score":0.2590999901294708},{"id":"https://openalex.org/C2776193868","wikidata":"https://www.wikidata.org/wiki/Q5154950","display_name":"Community engagement","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C169536714","wikidata":"https://www.wikidata.org/wiki/Q1666159","display_name":"Cultural competence","level":2,"score":0.25690001249313354},{"id":"https://openalex.org/C156325361","wikidata":"https://www.wikidata.org/wiki/Q1152864","display_name":"Grounded theory","level":3,"score":0.25600001215934753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3772318.3791172","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772318.3791172","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3772318.3791172","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772318.3791172","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.8102224469184875,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W2754799758","https://openalex.org/W2883495931","https://openalex.org/W2911227954","https://openalex.org/W2912986837","https://openalex.org/W2923014074","https://openalex.org/W3089813497","https://openalex.org/W3099919888","https://openalex.org/W3101223450","https://openalex.org/W3186799149","https://openalex.org/W4296405185","https://openalex.org/W4309619003","https://openalex.org/W4384662964","https://openalex.org/W4385571411","https://openalex.org/W4386981810","https://openalex.org/W4388007985","https://openalex.org/W4389403907","https://openalex.org/W4389519254","https://openalex.org/W4389524534","https://openalex.org/W4390976599","https://openalex.org/W4391136507","https://openalex.org/W4392120502","https://openalex.org/W4392637261","https://openalex.org/W4392939433","https://openalex.org/W4396832979","https://openalex.org/W4399365301","https://openalex.org/W4401042427","https://openalex.org/W4402671569","https://openalex.org/W4405234363","https://openalex.org/W4406241919","https://openalex.org/W4409280355","https://openalex.org/W4409736067","https://openalex.org/W4409736150","https://openalex.org/W4410946133","https://openalex.org/W4411120224","https://openalex.org/W4411550451","https://openalex.org/W4412444947","https://openalex.org/W4412886773","https://openalex.org/W4412889941","https://openalex.org/W4412944755","https://openalex.org/W4415797205","https://openalex.org/W4415797368"],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"are":[4,94],"typically":[5],"evaluated":[6],"through":[7,72],"general":[8],"or":[9,36],"domain-specific":[10],"benchmarks":[11],"testing":[12],"capabilities":[13],"that":[14,32],"often":[15],"lack":[16],"grounding":[17],"in":[18,78,100,104],"the":[19,41,87,101],"lived":[20],"realities":[21],"of":[22,49],"end":[23],"users.":[24],"Critical":[25],"domains":[26],"such":[27],"as":[28],"healthcare":[29],"require":[30],"evaluations":[31],"extend":[33],"beyond":[34],"artificial":[35],"simulated":[37],"tasks":[38],"to":[39,84],"reflect":[40],"everyday":[42],"needs,":[43],"cultural":[44],"practices,":[45],"and":[46,63,91,127],"nuanced":[47,114],"contexts":[48],"communities.":[50],"We":[51,96],"propose":[52],"Samiksha,":[53],"a":[54,73,121],"community-driven":[55,76],"evaluation":[56],"pipeline":[57,77],"co-created":[58],"with":[59],"civil-society":[60],"organizations":[61],"(CSOs)":[62],"community":[64,80,115],"members.":[65],"Our":[66,106],"approach":[67,99],"enables":[68],"scalable,":[69],"automated":[70],"benchmarking":[71],"culturally":[74],"aware,":[75],"which":[79],"feedback":[81],"informs":[82],"what":[83],"evaluate,":[85],"how":[86,92,109],"benchmark":[88],"is":[89],"built,":[90],"outputs":[93],"scored.":[95],"demonstrate":[97],"this":[98],"health":[102,116],"domain":[103],"India.":[105],"analysis":[107],"highlights":[108],"current":[110],"multilingual":[111],"LLMs":[112],"address":[113],"queries,":[117],"while":[118],"also":[119],"offering":[120],"scalable":[122],"pathway":[123],"for":[124],"contextually":[125],"grounded":[126],"inclusive":[128],"LLM":[129],"evaluation.":[130]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-04-14T00:00:00"}
