{"id":"https://openalex.org/W3216046298","doi":"https://doi.org/10.5281/zenodo.3949820","title":"PheneBank: Processed Medline Abstracts and PMC full articles + Phenotype-Disease Associations","display_name":"PheneBank: Processed Medline Abstracts and PMC full articles + Phenotype-Disease Associations","publication_year":2020,"publication_date":"2020-07-18","ids":{"openalex":"https://openalex.org/W3216046298","doi":"https://doi.org/10.5281/zenodo.3949820","mag":"3216046298"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:3949820","is_oa":true,"landing_page_url":"https://zenodo.org/record/3949820","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/3949820","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110555562","display_name":"Mohammad Taher Pilehvar","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Pilehvar, Mohammad Taher","raw_affiliation_strings":["University of Cambridge"],"affiliations":[{"raw_affiliation_string":"University of Cambridge","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112708286","display_name":"Nigel Collier","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Collier, Nigel","raw_affiliation_strings":["University of Cambridge"],"affiliations":[{"raw_affiliation_string":"University of Cambridge","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080121437","display_name":"Adam S. Bernard","orcid":"https://orcid.org/0000-0003-2699-5168"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bernard, Adam S.","raw_affiliation_strings":["Queen Mary University"],"affiliations":[{"raw_affiliation_string":"Queen Mary University","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009950897","display_name":"Damian Smedley","orcid":"https://orcid.org/0000-0002-5836-9850"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Smedley, Damian","raw_affiliation_strings":["Queen Mary University"],"affiliations":[{"raw_affiliation_string":"Queen Mary University","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5110555562"],"corresponding_institution_ids":["https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13656","display_name":"Science, Research, and Medicine","score":0.19370000064373016,"subfield":{"id":"https://openalex.org/subfields/2743","display_name":"Reproductive Medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T13656","display_name":"Science, Research, and Medicine","score":0.19370000064373016,"subfield":{"id":"https://openalex.org/subfields/2743","display_name":"Reproductive Medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/phenotype","display_name":"Phenotype","score":0.5963612794876099},{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.5646175742149353},{"id":"https://openalex.org/keywords/clinical-phenotype","display_name":"Clinical phenotype","score":0.5365602374076843},{"id":"https://openalex.org/keywords/disease","display_name":"Disease","score":0.43266913294792175},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.3818046748638153},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.3505901098251343},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.25282931327819824},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.18999981880187988},{"id":"https://openalex.org/keywords/internal-medicine","display_name":"Internal medicine","score":0.13539499044418335},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.06404775381088257}],"concepts":[{"id":"https://openalex.org/C127716648","wikidata":"https://www.wikidata.org/wiki/Q104053","display_name":"Phenotype","level":3,"score":0.5963612794876099},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.5646175742149353},{"id":"https://openalex.org/C3020646490","wikidata":"https://www.wikidata.org/wiki/Q25203551","display_name":"Clinical phenotype","level":4,"score":0.5365602374076843},{"id":"https://openalex.org/C2779134260","wikidata":"https://www.wikidata.org/wiki/Q12136","display_name":"Disease","level":2,"score":0.43266913294792175},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.3818046748638153},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.3505901098251343},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.25282931327819824},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.18999981880187988},{"id":"https://openalex.org/C126322002","wikidata":"https://www.wikidata.org/wiki/Q11180","display_name":"Internal medicine","level":1,"score":0.13539499044418335},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.06404775381088257},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:3949820","is_oa":true,"landing_page_url":"https://zenodo.org/record/3949820","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.3949820","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.3949820","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:3949820","is_oa":true,"landing_page_url":"https://zenodo.org/record/3949820","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Zero hunger","id":"https://metadata.un.org/sdg/2"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3197267427","https://openalex.org/W3164771895","https://openalex.org/W2604464132","https://openalex.org/W2161088626","https://openalex.org/W3029893540","https://openalex.org/W2314682585","https://openalex.org/W2186179281","https://openalex.org/W2313584493","https://openalex.org/W1995787398","https://openalex.org/W2135667114"],"abstract_inverted_index":{"<strong>The":[0,260],"PheneBank":[1,114,267],"project:</strong>":[2],"Free":[3],"text":[4,74,473],"scientific":[5],"literature":[6,129],"has":[7,82,366,439],"the":[8,20,50,54,73,102,108,128,168,187,197,223,228,233,250,256,266,271,321,341,347,352,357,429,446,451,468,478,506,512,526,539,549],"potential":[9],"to":[10,92,117,131,144,172,181,307,346,386,531,548,552],"be":[11,380],"an":[12,179,263],"incredibly":[13],"valuable":[14],"source":[15],"of":[16,58,63,89,167,194,212,244,249,265,273,290,392,477],"data":[17,378],"for":[18,52,96,147,382,467,490,538],"uncovering":[19],"often":[21],"hidden":[22],"relationships":[23],"between":[24,152,189],"genes,":[25],"diseases":[26,154],"and":[27,37,43,56,107,119,130,155,191,232,300,315,394,427,433,482,500],"phenotypes.":[28,195],"Phenotypic":[29],"descriptions":[30,48],"cover":[31],"abnormalities":[32],"in":[33,332,396,505],"anatomical":[34],"structures,":[35],"processes":[36],"behaviours.":[38],"For":[39],"example":[40],"'growth":[41],"delay'":[42],"'body":[44],"weight":[45],"loss'.":[46],"Such":[47],"form":[49],"basis":[51],"determining":[53],"existence":[55],"treatment":[57],"a":[59,86,204,370,397,423,488],"disease":[60],"but,":[61],"because":[62],"their":[64,532],"inherent":[65],"complexity,":[66],"have":[67,159,304,453],"previously":[68],"received":[69],"less":[70],"attention":[71],"by":[72,85,185,227],"mining":[75],"community.":[76],"In":[77,317],"recent":[78],"years,":[79],"significant":[80,150],"effort":[81],"been":[83,305,367,454],"spent":[84],"small":[87],"number":[88],"expert":[90],"curators":[91],"create":[93],"coding":[94],"systems":[95],"phenotypes":[97,169,245],"(called":[98],"\"ontologies\"),":[99],"such":[100,136],"as":[101,203,278,280,496],"Human":[103],"Phenotype":[104,110,562,565,575],"Ontology":[105,111],"(HP)":[106],"Mammalian":[109],"(MP).":[112],"The":[113,141,215,302,363,384,404,436],"project":[115,142],"proposes":[116],"support":[118],"speed":[120],"up":[121,255],"curation":[122],"using":[123,246,369],"terms":[124],"discovered":[125],"directly":[126],"from":[127,161,351,356,449,474],"automatically":[132,326],"integrate":[133],"them":[134],"with":[135,287,422,487,511],"standard":[137],"ontologies.":[138],"<br>":[139,140,259,361,401,536],"seeks":[143],"harness":[145],"texts":[146],"extracting":[148],"statistically":[149],"associations":[151,323,354],"phenotypes,":[153],"genes.":[156],"Earlier":[157],"approaches":[158],"suffered":[160],"not":[162],"providing":[163],"deep":[164,175],"semantic":[165,390,399],"representations":[166],"they":[170],"tried":[171],"target.":[173],"Our":[174],"learning-based":[176],"approach":[177,216],"is":[178,420],"attempt":[180],"overcome":[182],"this":[183],"issue":[184],"reducing":[186],"uncertainty":[188],"textual":[190],"ontological":[192],"forms":[193],"Specifically,":[196],"model":[198,373],"treats":[199],"multitoken":[200],"named":[201,421,444],"entities":[202,303,395],"single":[205],"token":[206],"which":[207,442,450],"allows":[208],"more":[209],"reliable":[210],"handling":[211],"multiword":[213],"expressions.":[214],"builds":[217],"on":[218,329,376,389],"ground":[219],"breaking":[220],"research":[221],"at":[222],"European":[224],"Bininformatics":[225],"Institute":[226],"PI":[229],"(Nigel":[230],"Collier)":[231],"Co-investigator":[234],"(Damian":[235],"Smedley,":[236],"Queen":[237],"Mary":[238],"University":[239],"London),":[240],"including":[241],"terminology":[242],"alignment":[243],"pairwise":[247],"scoring":[248],"conceptual":[251],"elements":[252],"that":[253,324,411,493,525,546],"make":[254],"phenotype.":[257],"http://www.phenebank.org":[258],"dataset:</strong>":[261],"As":[262,537],"output":[264],"project,":[268],"we":[269,319,339],"release":[270,320],"set":[272],"24":[274],"million":[275],"MEDLINE":[276],"abstracts":[277],"well":[279],"3.8M":[281],"open-access":[282],"PMC":[283],"full":[284],"articles":[285,452],"annotated":[286],"9":[288],"classes":[289],"entity:":[291],"Phenotype,":[292],"Disease,":[293],"Anatomy,":[294],"Cell,":[295],"Cell_line,":[296],"GPR,":[297],"Gene_variant,":[298],"Molecule,":[299],"Pathway.":[301],"mapped":[306],"five":[308],"major":[309],"ontologies:":[310],"SNOMED,":[311],"HPO,":[312],"MeSH,":[313],"PRO,":[314],"FMA.":[316],"addition,":[318],"phenotype-disease":[322],"are":[325,412,443,457,494,503,528,543],"extracted":[327],"based":[328],"co-occurrences":[330],"statistics":[331],"Medline":[333],"abstracts.":[334],"Among":[335],"different":[336,475],"statistical":[337],"measures":[338],"evaluated,":[340],"Fisher":[342,578],"test":[343],"best":[344],"corresponded":[345],"known":[348],"tuples":[349],"available":[350,355],"curated":[353],"Monarch":[358],"Initiative":[359],"(https://monarchinitiative.org).":[360],"<strong>Processing:</strong>":[362],"NER":[364],"tagging":[365],"done":[368],"BiLSTM-CRF":[371],"neural":[372],"(https://github.com/pilehvar/phenebank)":[374],"trained":[375],"expert-annotated":[377],"(to":[379],"released":[381],"research).":[383],"grounding":[385],"ontologies":[387],"relies":[388],"embedding":[391],"concepts":[393,527],"unified":[398],"space.":[400],"<strong>Data":[402],"format:</strong>":[403],"<strong>PheneBank_Processed_PubMed.tar.gz</strong>":[405],"file":[406,419],"contains":[407,428],"24,359,010":[408],".txt":[409,418,465],"files":[410,438,459,466],"classified":[413],"into":[414],"812":[415],"directories.":[416],"Each":[417,484],"PubMed":[424],"article":[425,462],"ID":[426],"corresponding":[430],"article's":[431],"abstract":[432],"its":[434],"annotations.":[435],"<strong>PheneBank_Processed_PMC.tar.gz</strong>":[437],"6,180":[440],"directories":[441],"after":[445],"journal":[447],"titles":[448],"drawn.":[455],"There":[456],"three":[458],"per":[460],"each":[461],"(i.e.,":[463],"3":[464],"3,751,770":[469],"distinct":[470],"articles),":[471],"containing":[472],"parts":[476],"article:":[479],".title.txt,":[480],".abstract.txt,":[481],".body.txt.":[483],"line":[485,508],"starts":[486],"word;":[489],"those":[491],"words":[492],"identified":[495],"entities,":[497],"entity":[498],"type":[499],"mapping":[501,533],"information":[502],"followed":[504],"same":[507],"(tab":[509],"separated),":[510],"following":[513,550],"format:":[514],"word":[515],"&lt;TAB&gt;":[516,518,520],":::":[517],"entity_type":[519],"entity_concept_ID_1##confidence_score_1":[521],"entity_concept_ID_2##confidence_score_2":[522],"...":[523],"Note":[524],"sorted":[529],"according":[530],"confidence":[534],"scores.":[535],"<strong>PheneBank_Associations.tsv</strong>":[540],"file,":[541],"there":[542],"ten":[544],"columns":[545],"correspond":[547],"(left":[551],"right):":[553],"-":[554,557,561,564,568,571,574,577,580,582],"Disease":[555,558,572],"Name<br>":[556,563],"(MONDO)":[559],"ID<br>":[560,567],"(HPO)":[566],"Co-occurrence":[569],"Frequency<br>":[570,573,576],"(log)<br>":[579],"Dice<br>":[581],"Normalized":[583],"PMI":[584]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
