{"id":"https://openalex.org/W2111303602","doi":"https://doi.org/10.1093/database/bas026","title":"Improving links between literature and biological data with text mining: a case study with GEO, PDB and MEDLINE","display_name":"Improving links between literature and biological data with text mining: a case study with GEO, PDB and MEDLINE","publication_year":2012,"publication_date":"2012-01-01","ids":{"openalex":"https://openalex.org/W2111303602","doi":"https://doi.org/10.1093/database/bas026","mag":"2111303602","pmid":"https://pubmed.ncbi.nlm.nih.gov/22685160"},"language":"en","primary_location":{"id":"doi:10.1093/database/bas026","is_oa":true,"landing_page_url":"https://doi.org/10.1093/database/bas026","pdf_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/bas026/16728781/bas026.pdf","source":{"id":"https://openalex.org/S4210201630","display_name":"Database","issn_l":"1758-0463","issn":["1758-0463"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311647","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/P4310311647"],"host_organization_lineage_names":["University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/bas026/16728781/bas026.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109241617","display_name":"Aur\u00e9lie N\u00e9v\u00e9ol","orcid":null},"institutions":[{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Aur\u00e9lie N\u00e9v\u00e9ol","raw_affiliation_strings":["National Center for Biotechnology Information, U.S. National Library of Medicine, Bethesda, MD 20894, USA"],"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information, U.S. National Library of Medicine, Bethesda, MD 20894, USA","institution_ids":["https://openalex.org/I4210109390"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111895206","display_name":"W. John Wilbur","orcid":null},"institutions":[{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"W. John Wilbur","raw_affiliation_strings":["National Center for Biotechnology Information, U.S. National Library of Medicine, Bethesda, MD 20894, USA"],"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information, U.S. National Library of Medicine, Bethesda, MD 20894, USA","institution_ids":["https://openalex.org/I4210109390"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083081872","display_name":"Zhiyong Lu","orcid":"https://orcid.org/0000-0001-9998-916X"},"institutions":[{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiyong Lu","raw_affiliation_strings":["National Center for Biotechnology Information, U.S. National Library of Medicine, Bethesda, MD 20894, USA"],"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information, U.S. National Library of Medicine, Bethesda, MD 20894, USA","institution_ids":["https://openalex.org/I4210109390"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5109241617"],"corresponding_institution_ids":["https://openalex.org/I4210109390"],"apc_list":{"value":1415,"currency":"GBP","value_usd":1735},"apc_paid":{"value":1415,"currency":"GBP","value_usd":1735},"fwci":2.0131,"has_fulltext":true,"cited_by_count":24,"citation_normalized_percentile":{"value":0.86589835,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"2012","issue":null,"first_page":"bas026","last_page":"bas026"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T13937","display_name":"Genetics, Bioinformatics, and Biomedical Research","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-curation","display_name":"Data curation","score":0.7698420286178589},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7322858572006226},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6474922299385071},{"id":"https://openalex.org/keywords/biological-database","display_name":"Biological database","score":0.5932259559631348},{"id":"https://openalex.org/keywords/protein-data-bank","display_name":"Protein Data Bank (RCSB PDB)","score":0.5320575833320618},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5125955939292908},{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.499711275100708},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.492047518491745},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.4901798367500305},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.480421781539917},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.46615374088287354},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4230611026287079},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4057621359825134},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3611242473125458},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.3386768698692322},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.10454532504081726}],"concepts":[{"id":"https://openalex.org/C91632574","wikidata":"https://www.wikidata.org/wiki/Q15088675","display_name":"Data curation","level":2,"score":0.7698420286178589},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7322858572006226},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6474922299385071},{"id":"https://openalex.org/C20901353","wikidata":"https://www.wikidata.org/wiki/Q4117139","display_name":"Biological database","level":2,"score":0.5932259559631348},{"id":"https://openalex.org/C65556437","wikidata":"https://www.wikidata.org/wiki/Q766195","display_name":"Protein Data Bank (RCSB PDB)","level":2,"score":0.5320575833320618},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5125955939292908},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.499711275100708},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.492047518491745},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.4901798367500305},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.480421781539917},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.46615374088287354},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4230611026287079},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4057621359825134},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3611242473125458},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.3386768698692322},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.10454532504081726},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000043","descriptor_name":"Abstracting and Indexing","qualifier_ui":"Q000592","qualifier_name":"standards","is_major_topic":false},{"descriptor_ui":"D000043","descriptor_name":"Abstracting and Indexing","qualifier_ui":"Q000592","qualifier_name":"standards","is_major_topic":false},{"descriptor_ui":"D000043","descriptor_name":"Abstracting and Indexing","qualifier_ui":"Q000592","qualifier_name":"standards","is_major_topic":false},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016206","descriptor_name":"Databases, Bibliographic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016206","descriptor_name":"Databases, Bibliographic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016206","descriptor_name":"Databases, Bibliographic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016239","descriptor_name":"MEDLINE","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016239","descriptor_name":"MEDLINE","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016239","descriptor_name":"MEDLINE","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D030541","descriptor_name":"Databases, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D030541","descriptor_name":"Databases, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D030541","descriptor_name":"Databases, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":"Q000592","qualifier_name":"standards","is_major_topic":false},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":"Q000592","qualifier_name":"standards","is_major_topic":false},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":"Q000592","qualifier_name":"standards","is_major_topic":false}],"locations_count":3,"locations":[{"id":"doi:10.1093/database/bas026","is_oa":true,"landing_page_url":"https://doi.org/10.1093/database/bas026","pdf_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/bas026/16728781/bas026.pdf","source":{"id":"https://openalex.org/S4210201630","display_name":"Database","issn_l":"1758-0463","issn":["1758-0463"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311647","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/P4310311647"],"host_organization_lineage_names":["University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database","raw_type":"journal-article"},{"id":"pmid:22685160","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/22685160","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database : the journal of biological databases and curation","raw_type":null},{"id":"pmh:oai:europepmc.org:2382209","is_oa":false,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/3371192","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1093/database/bas026","is_oa":true,"landing_page_url":"https://doi.org/10.1093/database/bas026","pdf_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/bas026/16728781/bas026.pdf","source":{"id":"https://openalex.org/S4210201630","display_name":"Database","issn_l":"1758-0463","issn":["1758-0463"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311647","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/P4310311647"],"host_organization_lineage_names":["University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6100000143051147,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[{"id":"https://openalex.org/G6336628147","display_name":null,"funder_award_id":"00002","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337372","display_name":"U.S. National Library of Medicine","ror":"https://ror.org/0060t0j89"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2111303602.pdf","grobid_xml":"https://content.openalex.org/works/W2111303602.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W1490056294","https://openalex.org/W1550258693","https://openalex.org/W1966284537","https://openalex.org/W1977008606","https://openalex.org/W2005958938","https://openalex.org/W2013607137","https://openalex.org/W2020541351","https://openalex.org/W2024906032","https://openalex.org/W2027943492","https://openalex.org/W2040610660","https://openalex.org/W2045016337","https://openalex.org/W2053673723","https://openalex.org/W2057240659","https://openalex.org/W2074640468","https://openalex.org/W2075322787","https://openalex.org/W2097052916","https://openalex.org/W2097553584","https://openalex.org/W2099369363","https://openalex.org/W2100676408","https://openalex.org/W2105812764","https://openalex.org/W2123551262","https://openalex.org/W2124320575","https://openalex.org/W2130479394","https://openalex.org/W2152398020","https://openalex.org/W2155461328","https://openalex.org/W2971629451","https://openalex.org/W6629328318","https://openalex.org/W6632766574","https://openalex.org/W6641858494","https://openalex.org/W6678149484","https://openalex.org/W6767662938"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W3115833764","https://openalex.org/W4226117346","https://openalex.org/W2991044277","https://openalex.org/W1548256585","https://openalex.org/W2636040319","https://openalex.org/W1564146404","https://openalex.org/W604846123","https://openalex.org/W2495737905","https://openalex.org/W2777926110"],"abstract_inverted_index":{"High-throughput":[0],"experiments":[1],"and":[2,22,29,62,82,93,106,121,135,138,148,169],"bioinformatics":[3],"techniques":[4],"are":[5,13,38,66],"creating":[6],"an":[7],"exploding":[8],"volume":[9],"of":[10,19,34,88,115,167,185,193],"data":[11,37],"that":[12,125,139,153,172],"becoming":[14],"overwhelming":[15],"to":[16,26,161,181],"keep":[17],"track":[18],"for":[20,51,59,146],"biologists":[21],"researchers":[23],"who":[24],"need":[25],"access,":[27],"analyze":[28],"process":[30],"existing":[31],"data.":[32],"Much":[33],"the":[35,46,54,86,94,112,126,133,165,183,191],"available":[36,195],"being":[39,68],"deposited":[40],"in":[41,73,76],"specialized":[42],"databases,":[43],"such":[44,79],"as":[45,80,188,190],"Gene":[47],"Expression":[48],"Omnibus":[49],"(GEO)":[50],"microarrays":[52],"or":[53],"Protein":[55],"Data":[56,64],"Bank":[57],"(PDB)":[58],"protein":[60],"structures":[61],"coordinates.":[63],"sets":[65],"also":[67],"described":[69],"by":[70],"their":[71],"authors":[72],"publications":[74],"archived":[75],"literature":[77,95],"databases":[78,92,136,198],"MEDLINE":[81],"PubMed":[83],"Central.":[84],"Currently,":[85],"curation":[87,117,128],"links":[89],"between":[90,118,141],"biological":[91],"mainly":[96],"relies":[97],"on":[98,132],"manual":[99],"labour,":[100],"which":[101],"makes":[102],"it":[103],"a":[104,176],"time-consuming":[105],"daunting":[107],"task.":[108],"Herein,":[109],"we":[110,151,178],"analysed":[111],"current":[113],"state":[114],"link":[116,127],"GEO,":[119],"PDB":[120,147],"MEDLINE.":[122],"We":[123],"found":[124],"is":[129,143],"heterogeneous":[130],"depending":[131],"sources":[134,142],"involved,":[137],"overlap":[140],"low,":[144],"<50%":[145],"GEO.":[149],"Furthermore,":[150],"showed":[152],"text-mining":[154],"tools":[155],"can":[156],"automatically":[157],"provide":[158],"valuable":[159],"evidence":[160],"help":[162],"curators":[163],"broaden":[164],"scope":[166],"articles":[168],"database":[170],"entries":[171],"they":[173],"review.":[174],"As":[175],"result,":[177],"made":[179],"recommendations":[180],"improve":[182],"coverage":[184],"curated":[186],"links,":[187],"well":[189],"consistency":[192],"information":[194],"from":[196],"different":[197],"while":[199],"maintaining":[200],"high-quality":[201],"curation.":[202],"Database":[203],"URLs:":[204],"http://www.ncbi.nlm.nih.gov/PubMed,":[205],"http://www.ncbi.nlm.nih.gov/geo/,":[206],"http://www.rcsb.org/pdb/":[207]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
