{"id":"https://openalex.org/W4416249866","doi":"https://doi.org/10.1109/ijcnn64981.2025.11229228","title":"Knowledge Probing on Decoder-Only Models in Medical Domain","display_name":"Knowledge Probing on Decoder-Only Models in Medical Domain","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416249866","doi":"https://doi.org/10.1109/ijcnn64981.2025.11229228"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11229228","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11229228","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101614194","display_name":"Ting Zeng","orcid":"https://orcid.org/0000-0002-5217-4147"},"institutions":[{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]}],"countries":["MY"],"is_corresponding":true,"raw_author_name":"Zeng Ting","raw_affiliation_strings":["University of Malaya,Faulty of Computer Science and Information Technology,Kuala Lumpur,Malaysia"],"affiliations":[{"raw_affiliation_string":"University of Malaya,Faulty of Computer Science and Information Technology,Kuala Lumpur,Malaysia","institution_ids":["https://openalex.org/I33849332"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063587446","display_name":"Chu Kiong Loo","orcid":"https://orcid.org/0000-0001-7867-2665"},"institutions":[{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Chu Kiong Loo","raw_affiliation_strings":["University of Malaya,Faulty of Computer Science and Information Technology,Kuala Lumpur,Malaysia"],"affiliations":[{"raw_affiliation_string":"University of Malaya,Faulty of Computer Science and Information Technology,Kuala Lumpur,Malaysia","institution_ids":["https://openalex.org/I33849332"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068359573","display_name":"Nurul Japar","orcid":"https://orcid.org/0000-0002-3054-1874"},"institutions":[{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Nurul Binti Japar","raw_affiliation_strings":["University of Malaya,Faulty of Computer Science and Information Technology,Kuala Lumpur,Malaysia"],"affiliations":[{"raw_affiliation_string":"University of Malaya,Faulty of Computer Science and Information Technology,Kuala Lumpur,Malaysia","institution_ids":["https://openalex.org/I33849332"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101614194"],"corresponding_institution_ids":["https://openalex.org/I33849332"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19396435,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3686999976634979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3686999976634979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.2159000039100647,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.15520000457763672,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6644999980926514},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6542999744415283},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5694000124931335},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5648999810218811},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.4794999957084656},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4731999933719635}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6699000000953674},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6644999980926514},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6542999744415283},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5694000124931335},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5648999810218811},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.4794999957084656},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4731999933719635},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4690999984741211},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.45969998836517334},{"id":"https://openalex.org/C2985722590","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medical knowledge","level":2,"score":0.45829999446868896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4009999930858612},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.3353999853134155},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.31839999556541443},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29179999232292175},{"id":"https://openalex.org/C534262118","wikidata":"https://www.wikidata.org/wiki/Q177719","display_name":"Medical diagnosis","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.28189998865127563},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.25619998574256897}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11229228","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11229228","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2971096647","https://openalex.org/W3115947671","https://openalex.org/W3164540570","https://openalex.org/W3205810519","https://openalex.org/W4291371203","https://openalex.org/W4390490761","https://openalex.org/W4402351120","https://openalex.org/W4402352122","https://openalex.org/W4402352920","https://openalex.org/W4404485589","https://openalex.org/W4411113361","https://openalex.org/W4412886643"],"related_works":[],"abstract_inverted_index":{"Knowledge":[0],"probing":[1,28,50,86,173,203],"plays":[2],"a":[3,84,208],"crucial":[4],"role":[5],"in":[6,21,125,151,180,223,233],"evaluating":[7],"the":[8,34,67,99,132,146,168,198,229,234],"factual":[9],"knowledge":[10,49,154,172,194,202],"retention":[11],"and":[12,58,91,103,138,175,204,211],"reasoning":[13,60,128],"capabilities":[14],"of":[15,36,101,148,170,200,231],"large":[16],"language":[17],"models":[18,105,120],"(LLMs),":[19],"particularly":[20,124],"high-risk":[22],"biomedical":[23,27,48,153,171,201,235],"applications.":[24],"However,":[25,130],"existing":[26],"benchmarks":[29],"often":[30],"fail":[31],"to":[32,113,206,214],"capture":[33],"complexity":[35],"real-world":[37,224],"medical":[38,193,225],"reasoning.":[39],"In":[40],"this":[41],"study,":[42],"we":[43,82],"introduce":[44],"MedLAMA2,":[45],"an":[46],"advanced":[47],"benchmark":[51,210],"built":[52],"upon":[53],"MedLAMA,":[54],"incorporating":[55],"multi-token":[56],"entities":[57],"two-hop":[59],"queries.":[61],"All":[62],"vocabularies":[63],"are":[64],"sourced":[65],"from":[66,111],"Unified":[68],"Medical":[69],"Language":[70],"System":[71],"(UMLS)":[72],"Metathesaurus**,":[73],"ensuring":[74],"domain-specific":[75],"accuracy.To":[76],"systematically":[77],"evaluate":[78],"LLMs":[79,150,232],"without":[80],"fine-tuning,":[81],"design":[83],"structured":[85,152,192],"pipeline":[87],"using":[88],"Top-k":[89],"Metric":[90,93],"Token-level":[92],"as":[94,136],"evaluation":[95,212],"parameters.":[96],"We":[97],"assess":[98],"performance":[100,179,222],"encoder-only":[102,122],"decoder-only":[104,119],"across":[106],"different":[107],"parameter":[108],"scales,":[109],"ranging":[110],"6B":[112],"72B.":[114],"Our":[115],"experiments":[116],"demonstrate":[117],"that":[118,160,185],"outperform":[121],"models,":[123,134],"complex":[126],"multi-hop":[127,181],"tasks.":[129],"even":[131,177],"best-performing":[133],"such":[135],"Llama3-70B-Instruct":[137],"Qwen-1.5-72B-Chat,":[139],"achieve":[140],"acc@10":[141],"scores":[142],"below":[143],"40%,":[144],"highlighting":[145],"limitations":[147],"current":[149],"retrieval.":[155],"Additionally,":[156],"our":[157],"results":[158],"show":[159],"Chain-of-Thought":[161],"(CoT)":[162],"prompting":[163],"does":[164],"not":[165,188],"significantly":[166],"improve":[167],"accuracy":[169],"tasks":[174],"may":[176,187],"degrade":[178],"settings.":[182],"This":[183],"suggests":[184],"CoT":[186],"be":[189],"well-suited":[190],"for":[191,219],"retrieval.This":[195],"study":[196],"highlights":[197],"challenges":[199],"aims":[205],"construct":[207],"new":[209],"process":[213],"provide":[215],"more":[216],"effective":[217],"methods":[218],"assessing":[220],"LLM":[221],"environments,":[226],"ultimately":[227],"promoting":[228],"application":[230],"domain.":[236]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
