{"id":"https://openalex.org/W3217298854","doi":"https://doi.org/10.5281/zenodo.1167696","title":"Phenebank: Processed Medline Abstracts","display_name":"Phenebank: Processed Medline Abstracts","publication_year":2018,"publication_date":"2018-02-06","ids":{"openalex":"https://openalex.org/W3217298854","doi":"https://doi.org/10.5281/zenodo.1167696","mag":"3217298854"},"language":"en","primary_location":{"id":"pmh:oai:figshare.com:article/11548938","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112708286","display_name":"Nigel Collier","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Collier, Nigel","raw_affiliation_strings":["University of Cambridge"],"affiliations":[{"raw_affiliation_string":"University of Cambridge","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110555562","display_name":"Mohammad Taher Pilehvar","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Pilehvar, Mohammad Taher","raw_affiliation_strings":["University of Cambridge"],"affiliations":[{"raw_affiliation_string":"University of Cambridge","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080121437","display_name":"Adam S. Bernard","orcid":"https://orcid.org/0000-0003-2699-5168"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bernard, Adam S.","raw_affiliation_strings":["Queen Mary University"],"affiliations":[{"raw_affiliation_string":"Queen Mary University","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009950897","display_name":"Damian Smedley","orcid":"https://orcid.org/0000-0002-5836-9850"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Smedley, Damian","raw_affiliation_strings":["Queen Mary University"],"affiliations":[{"raw_affiliation_string":"Queen Mary University","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5112708286"],"corresponding_institution_ids":["https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.31049999594688416,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.31049999594688416,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.5748616456985474},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.36074697971343994},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3254181742668152},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.09923499822616577}],"concepts":[{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.5748616456985474},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.36074697971343994},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3254181742668152},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.09923499822616577},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:figshare.com:article/11548938","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dataset"},{"id":"pmh:oai:zenodo.org:1167696","is_oa":true,"landing_page_url":"https://zenodo.org/record/1167696","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.1167696","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.1167696","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:figshare.com:article/11548938","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Dataset"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/2","display_name":"Zero hunger","score":0.6000000238418579}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"<strong>The":[0,260],"PheneBank":[1,114,267],"project:</strong>":[2],"Free":[3],"text":[4,74],"scientific":[5],"literature":[6,129],"has":[7,82,314],"the":[8,20,50,54,73,102,108,128,168,187,197,223,228,233,250,256,266,271,376,405,411,425],"potential":[9],"to":[10,92,117,131,144,172,181,299,333,430],"be":[11,327],"an":[12,179,263],"incredibly":[13],"valuable":[14],"source":[15],"of":[16,58,63,89,167,194,212,244,249,265,273,282,339],"data":[17,325],"for":[18,52,96,147,329,389],"uncovering":[19],"often":[21],"hidden":[22],"relationships":[23],"between":[24,152,189],"genes,":[25],"diseases":[26,154],"and":[27,37,43,56,107,119,130,155,191,232,292,307,341,374,380,399],"phenotypes.":[28,195],"Phenotypic":[29],"descriptions":[30,48],"cover":[31],"abnormalities":[32],"in":[33,343,404],"anatomical":[34],"structures,":[35],"processes":[36],"behaviours.":[38],"For":[39],"example":[40],"'growth":[41],"delay'":[42],"'body":[44],"weight":[45],"loss'.":[46],"Such":[47],"form":[49],"basis":[51],"determining":[53],"existence":[55],"treatment":[57],"a":[59,86,204,318,344,370,387],"disease":[60],"but,":[61],"because":[62],"their":[64,431],"inherent":[65],"complexity,":[66],"have":[67,159,296],"previously":[68],"received":[69],"less":[70],"attention":[71],"by":[72,85,185,227],"mining":[75],"community.":[76],"In":[77],"recent":[78],"years,":[79],"significant":[80,150],"effort":[81],"been":[83,297,315],"spent":[84],"small":[87],"number":[88],"expert":[90],"curators":[91],"create":[93],"coding":[94],"systems":[95],"phenotypes":[97,169,245],"(called":[98],"\"ontologies\"),":[99],"such":[100,136],"as":[101,203,395],"Human":[103],"Phenotype":[104,110],"Ontology":[105,111],"(HP)":[106],"Mammalian":[109],"(MP).":[112],"The":[113,141,215,294,311,331,351],"project":[115,142],"proposes":[116],"support":[118],"speed":[120],"up":[121,255],"curation":[122],"using":[123,246,317],"terms":[124],"discovered":[125],"directly":[126],"from":[127,161],"automatically":[132],"integrate":[133],"them":[134],"with":[135,279,369,386,410],"standard":[137],"ontologies.":[138],"<br>":[139,140,259,309,348],"seeks":[143],"harness":[145],"texts":[146],"extracting":[148],"statistically":[149],"associations":[151],"phenotypes,":[153],"genes.":[156],"Earlier":[157],"approaches":[158],"suffered":[160],"not":[162],"providing":[163],"deep":[164,175],"semantic":[165,337,346],"representations":[166],"they":[170],"tried":[171],"target.":[173],"Our":[174],"learning-based":[176],"approach":[177,216],"is":[178,367],"attempt":[180],"overcome":[182],"this":[183],"issue":[184],"reducing":[186],"uncertainty":[188],"textual":[190],"ontological":[192],"forms":[193],"Specifically,":[196],"model":[198,321],"treats":[199],"multitoken":[200],"named":[201,368],"entities":[202,295,342],"single":[205],"token":[206],"which":[207],"allows":[208],"more":[209],"reliable":[210],"handling":[211],"multiword":[213],"expressions.":[214],"builds":[217],"on":[218,323,336],"ground":[219],"breaking":[220],"research":[221],"at":[222],"European":[224],"Bininformatics":[225],"Institute":[226],"PI":[229],"(Nigel":[230],"Collier)":[231],"Co-investigator":[234],"(Damian":[235],"Smedley,":[236],"Queen":[237],"Mary":[238],"University":[239],"London),":[240],"including":[241],"terminology":[242],"alignment":[243],"pairwise":[247],"scoring":[248],"conceptual":[251],"elements":[252],"that":[253,358,392,424],"make":[254],"phenotype.":[257],"https://sites.google.com/site/nhcollier/projects/phenebank":[258],"dataset:</strong>":[261],"As":[262],"output":[264],"project,":[268],"we":[269],"release":[270],"set":[272],"24":[274],"million":[275],"MEDLINE":[276],"abstracts":[277],"annotated":[278],"9":[280],"classes":[281],"entity:":[283],"Phenotype,":[284],"Disease,":[285],"Anatomy,":[286],"Cell,":[287],"Cell_line,":[288],"GPR,":[289],"Gene_variant,":[290],"Molecule,":[291],"Pathway.":[293],"mapped":[298],"five":[300],"major":[301],"ontologies:":[302],"SNOMED,":[303],"HPO,":[304],"MeSH,":[305],"PRO,":[306],"FMA.":[308],"<strong>Processing:</strong>":[310],"NER":[312],"tagging":[313],"done":[316],"BiLSTM-CRF":[319],"neural":[320],"trained":[322],"expert-annotated":[324],"(to":[326],"released":[328],"research).":[330],"grounding":[332],"ontologies":[334],"relies":[335],"embedding":[338],"concepts":[340,426],"unified":[345],"space.":[347],"<strong>Data":[349],"format:</strong>":[350],"zip":[352],"file":[353,366],"contains":[354,375],"24359010":[355],".txt":[356,365],"files":[357],"are":[359,393,402,427],"classified":[360],"into":[361],"812":[362],"directories.":[363],"Each":[364,383],"PubMed":[371],"article":[372],"ID":[373],"corresponding":[377],"article's":[378],"abstract":[379],"its":[381],"annotations.":[382],"line":[384,407],"starts":[385],"word;":[388],"those":[390],"words":[391],"identified":[394],"entities,":[396],"entity":[397],"type":[398],"mapping":[400,432],"information":[401],"followed":[403],"same":[406],"(tab":[408],"separated),":[409],"following":[412],"format:":[413],"word":[414],"&lt;TAB&gt;":[415,417,419],":::":[416],"entity_type":[418],"entity_concept_ID_1##confidence_score_1":[420],"entity_concept_ID_2##confidence_score_2":[421],"...":[422],"Note":[423],"sorted":[428],"according":[429],"confidence":[433],"scores.":[434]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
