{"id":"https://openalex.org/W4393821053","doi":"https://doi.org/10.5281/zenodo.1422283","title":"Phenebank: Processed Medline Abstracts And Pmc Full Articles","display_name":"Phenebank: Processed Medline Abstracts And Pmc Full Articles","publication_year":2018,"publication_date":"2018-02-06","ids":{"openalex":"https://openalex.org/W4393821053","doi":"https://doi.org/10.5281/zenodo.1422283"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:1422283","is_oa":true,"landing_page_url":"https://zenodo.org/record/1422283","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/1422283","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112708286","display_name":"Nigel Collier","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Collier, Nigel","raw_affiliation_strings":["University of Cambridge"],"affiliations":[{"raw_affiliation_string":"University of Cambridge","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110555562","display_name":"Mohammad Taher Pilehvar","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Pilehvar, Mohammad Taher","raw_affiliation_strings":["University of Cambridge"],"affiliations":[{"raw_affiliation_string":"University of Cambridge","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080121437","display_name":"Adam S. Bernard","orcid":"https://orcid.org/0000-0003-2699-5168"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bernard, Adam S.","raw_affiliation_strings":["Queen Mary University"],"affiliations":[{"raw_affiliation_string":"Queen Mary University","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009950897","display_name":"Damian Smedley","orcid":"https://orcid.org/0000-0002-5836-9850"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Smedley, Damian","raw_affiliation_strings":["Queen Mary University"],"affiliations":[{"raw_affiliation_string":"Queen Mary University","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5112708286"],"corresponding_institution_ids":["https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13417","display_name":"Biomedical Ethics and Regulation","score":0.05900000035762787,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T13417","display_name":"Biomedical Ethics and Regulation","score":0.05900000035762787,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T13984","display_name":"Nutrition, Genetics, and Disease","score":0.051500000059604645,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.610219419002533},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4681048095226288},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.37192559242248535},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.3388041853904724},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.32670509815216064},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.1321692168712616}],"concepts":[{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.610219419002533},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4681048095226288},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.37192559242248535},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.3388041853904724},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.32670509815216064},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.1321692168712616},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:1422283","is_oa":true,"landing_page_url":"https://zenodo.org/record/1422283","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.1422283","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.1422283","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:1422283","is_oa":true,"landing_page_url":"https://zenodo.org/record/1422283","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W3031052312","https://openalex.org/W4389568370","https://openalex.org/W3032375762","https://openalex.org/W1995515455","https://openalex.org/W2080531066","https://openalex.org/W3108674512","https://openalex.org/W1506200166","https://openalex.org/W1489783725","https://openalex.org/W2148612803"],"abstract_inverted_index":{"<strong>The":[0,260],"PheneBank":[1,114,267],"project:</strong>":[2],"Free":[3],"text":[4,74,428],"scientific":[5],"literature":[6,129],"has":[7,82,322,394],"the":[8,20,50,54,73,102,108,128,168,187,197,223,228,233,250,256,266,271,384,401,406,423,433,461,467,481],"potential":[9],"to":[10,92,117,131,144,172,181,307,341,486],"be":[11,335],"an":[12,179,263],"incredibly":[13],"valuable":[14],"source":[15],"of":[16,58,63,89,167,194,212,244,249,265,273,290,347,432],"data":[17,333],"for":[18,52,96,147,337,422,445],"uncovering":[19],"often":[21],"hidden":[22],"relationships":[23],"between":[24,152,189],"genes,":[25],"diseases":[26,154],"and":[27,37,43,56,107,119,130,155,191,232,300,315,349,382,388,437,455],"phenotypes.":[28,195],"Phenotypic":[29],"descriptions":[30,48],"cover":[31],"abnormalities":[32],"in":[33,351,460],"anatomical":[34],"structures,":[35],"processes":[36],"behaviours.":[38],"For":[39],"example":[40],"'growth":[41],"delay'":[42],"'body":[44],"weight":[45],"loss'.":[46],"Such":[47],"form":[49],"basis":[51],"determining":[53],"existence":[55],"treatment":[57],"a":[59,86,204,326,352,378,443],"disease":[60],"but,":[61],"because":[62],"their":[64,487],"inherent":[65],"complexity,":[66],"have":[67,159,304,408],"previously":[68],"received":[69],"less":[70],"attention":[71],"by":[72,85,185,227],"mining":[75],"community.":[76],"In":[77],"recent":[78],"years,":[79],"significant":[80,150],"effort":[81],"been":[83,305,323,409],"spent":[84],"small":[87],"number":[88],"expert":[90],"curators":[91],"create":[93],"coding":[94],"systems":[95],"phenotypes":[97,169,245],"(called":[98],"\"ontologies\"),":[99],"such":[100,136],"as":[101,203,278,280,451],"Human":[103],"Phenotype":[104,110],"Ontology":[105,111],"(HP)":[106],"Mammalian":[109],"(MP).":[112],"The":[113,141,215,302,319,339,359,391],"project":[115,142],"proposes":[116],"support":[118],"speed":[120],"up":[121,255],"curation":[122],"using":[123,246,325],"terms":[124],"discovered":[125],"directly":[126],"from":[127,161,404,429],"automatically":[132],"integrate":[133],"them":[134],"with":[135,287,377,442,466],"standard":[137],"ontologies.":[138],"<br>":[139,140,259,317,356],"seeks":[143],"harness":[145],"texts":[146],"extracting":[148],"statistically":[149],"associations":[151],"phenotypes,":[153],"genes.":[156],"Earlier":[157],"approaches":[158],"suffered":[160],"not":[162],"providing":[163],"deep":[164,175],"semantic":[165,345,354],"representations":[166],"they":[170],"tried":[171],"target.":[173],"Our":[174],"learning-based":[176],"approach":[177,216],"is":[178,375],"attempt":[180],"overcome":[182],"this":[183],"issue":[184],"reducing":[186],"uncertainty":[188],"textual":[190],"ontological":[192],"forms":[193],"Specifically,":[196],"model":[198,329],"treats":[199],"multitoken":[200],"named":[201,376,399],"entities":[202,303,350],"single":[205],"token":[206],"which":[207,397,405],"allows":[208],"more":[209],"reliable":[210],"handling":[211],"multiword":[213],"expressions.":[214],"builds":[217],"on":[218,331,344],"ground":[219],"breaking":[220],"research":[221],"at":[222],"European":[224],"Bininformatics":[225],"Institute":[226],"PI":[229],"(Nigel":[230],"Collier)":[231],"Co-investigator":[234],"(Damian":[235],"Smedley,":[236],"Queen":[237],"Mary":[238],"University":[239],"London),":[240],"including":[241],"terminology":[242],"alignment":[243],"pairwise":[247],"scoring":[248],"conceptual":[251],"elements":[252],"that":[253,366,448,480],"make":[254],"phenotype.":[257],"https://sites.google.com/site/nhcollier/projects/phenebank":[258],"dataset:</strong>":[261],"As":[262],"output":[264],"project,":[268],"we":[269],"release":[270],"set":[272],"24":[274],"million":[275],"MEDLINE":[276],"abstracts":[277],"well":[279],"3.8M":[281],"open-access":[282],"PMC":[283],"full":[284],"articles":[285,407],"annotated":[286],"9":[288],"classes":[289],"entity:":[291],"Phenotype,":[292],"Disease,":[293],"Anatomy,":[294],"Cell,":[295],"Cell_line,":[296],"GPR,":[297],"Gene_variant,":[298],"Molecule,":[299],"Pathway.":[301],"mapped":[306],"five":[308],"major":[309],"ontologies:":[310],"SNOMED,":[311],"HPO,":[312],"MeSH,":[313],"PRO,":[314],"FMA.":[316],"<strong>Processing:</strong>":[318],"NER":[320],"tagging":[321],"done":[324],"BiLSTM-CRF":[327],"neural":[328],"trained":[330],"expert-annotated":[332],"(to":[334],"released":[336],"research).":[338],"grounding":[340],"ontologies":[342],"relies":[343],"embedding":[346],"concepts":[348,482],"unified":[353],"space.":[355],"<strong>Data":[357],"format:</strong>":[358],"\"PheneBank_Processed_PubMed.tar.gz\"":[360],"file":[361,374],"contains":[362,383],"24359010":[363],".txt":[364,373,420],"files":[365,393,414,421],"are":[367,398,412,449,458,483],"classified":[368],"into":[369],"812":[370],"directories.":[371],"Each":[372,439],"PubMed":[379],"article":[380,417],"ID":[381],"corresponding":[385],"article's":[386],"abstract":[387],"its":[389],"annotations.":[390],"\"PheneBank_Processed_PMC.tar.gz\"":[392],"6180":[395],"directories":[396],"after":[400],"journal":[402],"titles":[403],"drawn.":[410],"There":[411],"three":[413],"per":[415],"each":[416],"(i.e.,":[418],"3":[419],"3751770":[424],"distinct":[425],"articles),":[426],"containing":[427],"different":[430],"parts":[431],"article:":[434],".title.txt,":[435],".abstract.txt,":[436],".body.txt.":[438],"line":[440,463],"starts":[441],"word;":[444],"those":[446],"words":[447],"identified":[450],"entities,":[452],"entity":[453],"type":[454],"mapping":[456,488],"information":[457],"followed":[459],"same":[462],"(tab":[464],"separated),":[465],"following":[468],"format:":[469],"word":[470],"&lt;TAB&gt;":[471,473,475],":::":[472],"entity_type":[474],"entity_concept_ID_1##confidence_score_1":[476],"entity_concept_ID_2##confidence_score_2":[477],"...":[478],"Note":[479],"sorted":[484],"according":[485],"confidence":[489],"scores.":[490]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
