{"id":"https://openalex.org/W4416410080","doi":"https://doi.org/10.48550/arxiv.2507.19315","title":"AutoPCR: Automated Phenotype Concept Recognition by Prompting","display_name":"AutoPCR: Automated Phenotype Concept Recognition by Prompting","publication_year":2025,"publication_date":"2025-07-25","ids":{"openalex":"https://openalex.org/W4416410080","doi":"https://doi.org/10.48550/arxiv.2507.19315"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2507.19315","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.19315","pdf_url":"https://arxiv.org/pdf/2507.19315","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2507.19315","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074833951","display_name":"Yicheng Tao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Tao, Yicheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044748400","display_name":"Yuanhao Huang","orcid":"https://orcid.org/0000-0001-8190-0390"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Yuanhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, Yiqun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yiqun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Luo, Xin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Liu, Jie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jie","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5074833951"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.90420001745224,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.90420001745224,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.05350000038743019,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.011099999770522118,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.6564000248908997},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6298999786376953},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5098999738693237},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38929998874664307},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.37380000948905945},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.37220001220703125},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3716999888420105},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.36329999566078186}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7092999815940857},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.6564000248908997},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6298999786376953},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6205000281333923},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5141000151634216},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5098999738693237},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47350001335144043},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38929998874664307},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.37380000948905945},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.37220001220703125},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3716999888420105},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.36329999566078186},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3400000035762787},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3276999890804291},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.32510000467300415},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3158999979496002},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.3052999973297119},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.3034999966621399},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C127716648","wikidata":"https://www.wikidata.org/wiki/Q104053","display_name":"Phenotype","level":3,"score":0.28769999742507935},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.2872999906539917},{"id":"https://openalex.org/C2777220311","wikidata":"https://www.wikidata.org/wiki/Q6423340","display_name":"Knowledge acquisition","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.260699987411499}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2507.19315","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.19315","pdf_url":"https://arxiv.org/pdf/2507.19315","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2507.19315","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2507.19315","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2507.19315","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.19315","pdf_url":"https://arxiv.org/pdf/2507.19315","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Motivation:":[0],"Phenotype":[1],"concept":[2],"recognition":[3],"(CR)":[4],"is":[5,117],"a":[6,54],"fundamental":[7],"task":[8],"in":[9],"biomedical":[10,31],"text":[11,27],"mining.":[12],"However,":[13],"existing":[14],"methods":[15],"either":[16],"require":[17],"ontology-specific":[18,70],"training,":[19],"making":[20],"them":[21],"struggle":[22],"to":[23,60,63,109],"generalize":[24,62],"across":[25,96],"diverse":[26],"styles":[28],"and":[29,66,92,100,107,113],"evolving":[30],"terminology,":[32],"or":[33],"depend":[34],"on":[35],"general-purpose":[36],"large":[37],"language":[38],"models":[39],"(LLMs)":[40],"that":[41,86],"lack":[42],"necessary":[43],"domain":[44],"knowledge.":[45],"Results:":[46],"To":[47,72],"address":[48],"these":[49],"limitations,":[50],"we":[51,76],"propose":[52],"AutoPCR,":[53],"prompt-based":[55],"phenotype":[56],"CR":[57],"method":[58],"designed":[59],"automatically":[61],"new":[64,110],"ontologies":[65],"unseen":[67],"data":[68],"without":[69],"training.":[71],"further":[73],"boost":[74],"performance,":[75],"also":[77],"introduce":[78],"an":[79],"optional":[80],"self-supervised":[81],"training":[82],"strategy.":[83],"Experiments":[84],"show":[85],"AutoPCR":[87],"achieves":[88],"the":[89],"best":[90],"average":[91],"most":[93],"robust":[94],"performance":[95],"datasets.":[97],"Further":[98],"ablation":[99],"transfer":[101],"studies":[102],"demonstrate":[103],"its":[104],"inductive":[105],"capability":[106],"generalizability":[108],"ontologies.":[111],"Availability":[112],"Implementation:":[114],"Our":[115],"code":[116],"available":[118],"at":[119],"https://github.com/yctao7/AutoPCR.":[120],"Contact:":[121],"drjieliu@umich.edu":[122]},"counts_by_year":[],"updated_date":"2026-04-07T06:01:17.266235","created_date":"2025-10-10T00:00:00"}
