{"id":"https://openalex.org/W7117460941","doi":"https://doi.org/10.1109/dicta68720.2025.11302446","title":"MedGemma-Critic: Fine-Tuning Medical Language Models for Domain-Specialised Text Evaluation","display_name":"MedGemma-Critic: Fine-Tuning Medical Language Models for Domain-Specialised Text Evaluation","publication_year":2025,"publication_date":"2025-12-03","ids":{"openalex":"https://openalex.org/W7117460941","doi":"https://doi.org/10.1109/dicta68720.2025.11302446"},"language":null,"primary_location":{"id":"doi:10.1109/dicta68720.2025.11302446","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dicta68720.2025.11302446","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121465265","display_name":"Sirui Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Sirui Liu","raw_affiliation_strings":["School of Computer Science, The University of Sydney,Sydney,Australia"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Sydney,Sydney,Australia","institution_ids":["https://openalex.org/I129604602"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100614823","display_name":"Jinman Kim","orcid":"https://orcid.org/0000-0002-3647-7826"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Jinman Kim","raw_affiliation_strings":["School of Computer Science, The University of Sydney,Sydney,Australia"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, The University of Sydney,Sydney,Australia","institution_ids":["https://openalex.org/I129604602"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5121465265"],"corresponding_institution_ids":["https://openalex.org/I129604602"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.61446765,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.4068000018596649,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.4068000018596649,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.35010001063346863,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.05550000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.7069000005722046},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5455999970436096},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4952999949455261},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.43050000071525574},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.34220001101493835},{"id":"https://openalex.org/keywords/unified-medical-language-system","display_name":"Unified Medical Language System","score":0.33090001344680786},{"id":"https://openalex.org/keywords/medical-knowledge","display_name":"Medical knowledge","score":0.3190999925136566},{"id":"https://openalex.org/keywords/medical-practice","display_name":"Medical practice","score":0.30140000581741333}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7268000245094299},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.7069000005722046},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6000000238418579},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5455999970436096},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4952999949455261},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48260000348091125},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.43050000071525574},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3864000141620636},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.34220001101493835},{"id":"https://openalex.org/C69505689","wikidata":"https://www.wikidata.org/wiki/Q455338","display_name":"Unified Medical Language System","level":2,"score":0.33090001344680786},{"id":"https://openalex.org/C2985722590","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medical knowledge","level":2,"score":0.3190999925136566},{"id":"https://openalex.org/C2993312423","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medical practice","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C2983241795","wikidata":"https://www.wikidata.org/wiki/Q6806500","display_name":"Medical decision making","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2856999933719635},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C59594135","wikidata":"https://www.wikidata.org/wiki/Q5249242","display_name":"Decision model","level":2,"score":0.2572000026702881},{"id":"https://openalex.org/C2779231881","wikidata":"https://www.wikidata.org/wiki/Q5977147","display_name":"Medical literature","level":2,"score":0.2547000050544739},{"id":"https://openalex.org/C109747225","wikidata":"https://www.wikidata.org/wiki/Q815758","display_name":"Scarcity","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dicta68720.2025.11302446","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dicta68720.2025.11302446","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.49381446838378906}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2901466771","https://openalex.org/W2911489562","https://openalex.org/W2995225687","https://openalex.org/W4381587418","https://openalex.org/W4390873312","https://openalex.org/W4402670365","https://openalex.org/W4402727764","https://openalex.org/W4408226143"],"related_works":[],"abstract_inverted_index":{"Evaluating":[0],"medical":[1,42,51,62,192],"AI-generated":[2],"text":[3,52],"remains":[4],"challenging":[5],"due":[6],"to":[7,114,136],"the":[8,15,91,121,128,160],"lack":[9,40],"of":[10,18,93],"domain-specific":[11],"evaluation":[12,81,189],"models":[13,38],"and":[14,72,75,87,105,111,118,130,176,185],"limited":[16],"availability":[17],"clinician":[19],"annotations.":[20],"Existing":[21],"approaches":[22],"typically":[23],"rely":[24],"either":[25],"on":[26,34,150],"overlap-based":[27],"metrics":[28],"that":[29,39,54,68,102],"reward":[30],"surface":[31],"alignment":[32],"or":[33,141],"general-purpose,":[35],"all-domain":[36],"critic":[37,48,67,135,161,184],"specialised":[41,183],"knowledge.":[43],"We":[44],"present":[45],"a":[46,60,66,78,98,134,138,146,165,173,182,186],"domain-specialised":[47],"model":[49,64,162],"for":[50,191],"generation":[53],"addresses":[55],"two":[56],"gaps:":[57],"(i)":[58],"transforming":[59],"generative":[61],"language":[63],"into":[65],"delivers":[69],"both":[70,172],"scores":[71],"pairwise":[73,142],"preferences,":[74],"(ii)":[76],"creating":[77],"clinically":[79],"annotated":[80],"dataset":[82,154],"aligned":[83],"with":[84,108,145],"criteria\u2014accuracy,":[85],"completeness,":[86],"readability.":[88],"To":[89],"mitigate":[90],"scarcity":[92],"expert":[94],"labels,":[95],"we":[96,124],"introduce":[97],"multi-agent":[99,158],"data-curation":[100],"pipeline":[101],"combines":[103],"general-":[104],"medical-domain":[106],"judges":[107],"template-controlled":[109],"prompts":[110],"agreement":[112],"filters":[113],"produce":[115],"consistent":[116],"labels":[117],"rationales.":[119],"On":[120],"modelling":[122],"side,":[123],"adopt":[125],"MedGemma-4B":[126],"as":[127,133],"backbone":[129,175],"train":[131],"it":[132],"emit":[137],"decision":[139,168],"(score":[140],"preference)":[143],"together":[144],"brief":[147],"rationale.":[148],"Trained":[149],"an":[151],"approximately":[152],"3k":[153],"curated":[155],"by":[156],"our":[157],"pipeline,":[159],"MedGemma-Critic":[163],"achieves":[164],"higher":[166],"overall":[167],"match":[169],"rate":[170],"than":[171],"zero-shot":[174],"generalpurpose":[177],"critics.":[178],"This":[179],"work":[180],"provides":[181],"reusable":[187],"clinical":[188],"framework":[190],"AI.":[193]},"counts_by_year":[],"updated_date":"2025-12-30T23:08:21.542490","created_date":"2025-12-29T00:00:00"}
