{"id":"https://openalex.org/W7113905083","doi":"https://doi.org/10.1145/3765612.3767763","title":"PhenoGPT2: A Multimodal Fine-tuned Large Language Models for Phenotype Extraction and Normalization from Clinical Text and Facial Images","display_name":"PhenoGPT2: A Multimodal Fine-tuned Large Language Models for Phenotype Extraction and Normalization from Clinical Text and Facial Images","publication_year":2025,"publication_date":"2025-10-12","ids":{"openalex":"https://openalex.org/W7113905083","doi":"https://doi.org/10.1145/3765612.3767763"},"language":null,"primary_location":{"id":"doi:10.1145/3765612.3767763","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3765612.3767763","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3765612.3767763","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3765612.3767763","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Quan Minh Nguyen","orcid":"https://orcid.org/0009-0005-4181-7943"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Quan Minh Nguyen","raw_affiliation_strings":["Bioengineering, University of Pennsylvania, Philadelphia, PA, USA"],"affiliations":[{"raw_affiliation_string":"Bioengineering, University of Pennsylvania, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mian Umair Ahsan","orcid":"https://orcid.org/0000-0003-4725-2451"},"institutions":[{"id":"https://openalex.org/I1335321130","display_name":"Children's Hospital of Philadelphia","ror":"https://ror.org/01z7r7q48","country_code":"US","type":"funder","lineage":["https://openalex.org/I1335321130"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mian Umair Ahsan","raw_affiliation_strings":["Raymond G. Perelman Center for Cellular and Molecular Therapeutics, Children's Hospital of Philadelphia, Philadelphia, PA, USA"],"affiliations":[{"raw_affiliation_string":"Raymond G. Perelman Center for Cellular and Molecular Therapeutics, Children's Hospital of Philadelphia, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I1335321130"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhanliang Wang","orcid":"https://orcid.org/0009-0003-7466-9946"},"institutions":[{"id":"https://openalex.org/I4210131439","display_name":"Applied Mathematics (United States)","ror":"https://ror.org/03seew607","country_code":"US","type":"company","lineage":["https://openalex.org/I4210131439"]},{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhanliang Wang","raw_affiliation_strings":["Applied Mathematics and Computational Science, University of Pennsylvania, Philadelphia, PA, USA"],"affiliations":[{"raw_affiliation_string":"Applied Mathematics and Computational Science, University of Pennsylvania, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I4210131439","https://openalex.org/I79576946"]}]},{"author_position":"last","author":{"id":null,"display_name":"Kai Wang","orcid":"https://orcid.org/0000-0002-5585-982X"},"institutions":[{"id":"https://openalex.org/I1335321130","display_name":"Children's Hospital of Philadelphia","ror":"https://ror.org/01z7r7q48","country_code":"US","type":"funder","lineage":["https://openalex.org/I1335321130"]},{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kai Wang","raw_affiliation_strings":["Pathology and Laboratory Medicine, University of Pennsylvania, Philadelphia, PA, USA","Raymond G. Perelman Center for Cellular and Molecular Therapeutics, Children's Hospital of Philadelphia, Philadelphia, PA, USA"],"affiliations":[{"raw_affiliation_string":"Pathology and Laboratory Medicine, University of Pennsylvania, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I79576946"]},{"raw_affiliation_string":"Raymond G. Perelman Center for Cellular and Molecular Therapeutics, Children's Hospital of Philadelphia, Philadelphia, PA, USA","institution_ids":["https://openalex.org/I1335321130"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I79576946"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.62077438,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.553600013256073,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.553600013256073,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.2362000048160553,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.025599999353289604,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.6263999938964844},{"id":"https://openalex.org/keywords/phenotype","display_name":"Phenotype","score":0.41620001196861267},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.40310001373291016},{"id":"https://openalex.org/keywords/clinical-phenotype","display_name":"Clinical phenotype","score":0.39089998602867126},{"id":"https://openalex.org/keywords/biomedical-text-mining","display_name":"Biomedical text mining","score":0.37439998984336853},{"id":"https://openalex.org/keywords/unified-medical-language-system","display_name":"Unified Medical Language System","score":0.3172000050544739},{"id":"https://openalex.org/keywords/negation","display_name":"Negation","score":0.30390000343322754},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.30140000581741333}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6367999911308289},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.6263999938964844},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5626999735832214},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5623000264167786},{"id":"https://openalex.org/C127716648","wikidata":"https://www.wikidata.org/wiki/Q104053","display_name":"Phenotype","level":3,"score":0.41620001196861267},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.40310001373291016},{"id":"https://openalex.org/C3020646490","wikidata":"https://www.wikidata.org/wiki/Q25203551","display_name":"Clinical phenotype","level":4,"score":0.39089998602867126},{"id":"https://openalex.org/C165141518","wikidata":"https://www.wikidata.org/wiki/Q4915126","display_name":"Biomedical text mining","level":3,"score":0.37439998984336853},{"id":"https://openalex.org/C69505689","wikidata":"https://www.wikidata.org/wiki/Q455338","display_name":"Unified Medical Language System","level":2,"score":0.3172000050544739},{"id":"https://openalex.org/C2185349","wikidata":"https://www.wikidata.org/wiki/Q190558","display_name":"Negation","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2863999903202057},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2858000099658966},{"id":"https://openalex.org/C25810664","wikidata":"https://www.wikidata.org/wiki/Q44325","display_name":"Ontology","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.27309998869895935},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C66782513","wikidata":"https://www.wikidata.org/wiki/Q864601","display_name":"Biomedicine","level":2,"score":0.2603999972343445},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.25699999928474426},{"id":"https://openalex.org/C3019952477","wikidata":"https://www.wikidata.org/wiki/Q1324077","display_name":"Health records","level":3,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3765612.3767763","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3765612.3767763","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3765612.3767763","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3765612.3767763","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3765612.3767763","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3765612.3767763","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7420286536216736,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7113905083.pdf","grobid_xml":"https://content.openalex.org/works/W7113905083.grobid-xml"},"referenced_works_count":3,"referenced_works":["https://openalex.org/W4282918066","https://openalex.org/W4385848332","https://openalex.org/W4388616071"],"related_works":[],"abstract_inverted_index":{"The":[0],"Human":[1],"Phenotype":[2],"Ontology":[3],"(HPO)1":[4],"and":[5,13,22,46,55,67,105,126,139],"PhenoPacket":[6],"schema2":[7],"are":[8,108],"increasingly":[9],"adopted":[10],"in":[11,30,135],"research":[12],"clinical":[14,39,57],"settings":[15],"to":[16],"describe":[17],"phenotypes":[18],"of":[19],"rare":[20,123],"diseases":[21],"prioritize":[23],"candidate":[24],"genes.":[25],"However,":[26],"phenotype":[27,78,87],"data":[28],"captured":[29],"Electronic":[31],"Health":[32],"Records":[33],"(EHRs)":[34],"often":[35],"exist":[36],"as":[37,73,122],"unstructured":[38],"texts,":[40],"rather":[41],"than":[42],"standardized":[43],"HPO":[44,100],"terms,":[45],"can":[47],"be":[48],"complicated":[49],"by":[50],"typos,":[51],"abbreviations,":[52],"synonyms,":[53],"negations":[54],"varied":[56],"descriptions.":[58],"These":[59,116],"issues":[60],"present":[61],"significant":[62],"challenges":[63,117],"for":[64,82,102],"reliable":[65],"extraction":[66],"normalization.":[68],"Furthermore,":[69],"media":[70],"types":[71],"such":[72,121],"facial":[74],"images":[75],"represent":[76],"valuable":[77],"information,":[79],"yet":[80],"methods":[81],"incorporating":[83],"them":[84],"into":[85],"structured":[86],"representations":[88],"remain":[89],"limited.":[90],"Compounding":[91],"these":[92,144],"challenges,":[93],"comprehensive":[94],"training":[95],"datasets":[96],"covering":[97],"the":[98],"complete":[99],"terms":[101],"both":[103],"text":[104],"vision":[106],"modalities":[107],"virtually":[109],"nonexistent,":[110],"making":[111],"model":[112],"development":[113],"particularly":[114],"difficult.":[115],"hinder":[118],"downstream":[119],"applications":[120],"disease":[124],"diagnosis":[125],"gene":[127],"prioritization.":[128],"Large":[129],"language":[130],"models":[131],"have":[132],"shown":[133],"promise":[134],"interpreting":[136],"complex":[137],"textual":[138],"image":[140],"data,":[141],"potentially":[142],"addressing":[143],"limitations.":[145]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-11T00:00:00"}
