{"id":"https://openalex.org/W2121157913","doi":"https://doi.org/10.1093/database/bau056","title":"Natural language processing pipelines to annotate BioC collections with an application to the NCBI disease corpus","display_name":"Natural language processing pipelines to annotate BioC collections with an application to the NCBI disease corpus","publication_year":2014,"publication_date":"2014-06-16","ids":{"openalex":"https://openalex.org/W2121157913","doi":"https://doi.org/10.1093/database/bau056","mag":"2121157913","pmid":"https://pubmed.ncbi.nlm.nih.gov/24935050"},"language":"en","primary_location":{"id":"doi:10.1093/database/bau056","is_oa":true,"landing_page_url":"https://doi.org/10.1093/database/bau056","pdf_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/bau056/8246294/bau056.pdf","source":{"id":"https://openalex.org/S4210201630","display_name":"Database","issn_l":"1758-0463","issn":["1758-0463"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311647","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/P4310311647"],"host_organization_lineage_names":["University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/bau056/8246294/bau056.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111968060","display_name":"D. C. Comeau","orcid":null},"institutions":[{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]},{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"D. C. Comeau","raw_affiliation_strings":["National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA comeau@ncbi.nlm.nih.gov","National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA comeau@ncbi.nlm.nih.gov","institution_ids":["https://openalex.org/I4210109390"]},{"raw_affiliation_string":"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115602439","display_name":"Haibo Liu","orcid":"https://orcid.org/0000-0002-4213-2883"},"institutions":[{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]},{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"H. Liu","raw_affiliation_strings":["National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]},{"author_position":"middle","author":{"id":null,"display_name":"R. Islamaj Do\u00a0an","orcid":null},"institutions":[{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]},{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"R. Islamaj Do\u00a0an","raw_affiliation_strings":["National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111895206","display_name":"W. John Wilbur","orcid":null},"institutions":[{"id":"https://openalex.org/I1299303238","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88","country_code":"US","type":"government","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238"]},{"id":"https://openalex.org/I4210109390","display_name":"National Center for Biotechnology Information","ror":"https://ror.org/02meqm098","country_code":"US","type":"facility","lineage":["https://openalex.org/I1299022934","https://openalex.org/I1299303238","https://openalex.org/I2800548410","https://openalex.org/I4210109390"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"W. J. Wilbur","raw_affiliation_strings":["National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"National Center for Biotechnology Information, National Library of Medicine, National Institutes of Health, 8600 Rockville Pike, Bethesda, MD 20894, USA","institution_ids":["https://openalex.org/I4210109390","https://openalex.org/I1299303238"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5111968060"],"corresponding_institution_ids":["https://openalex.org/I1299303238","https://openalex.org/I4210109390"],"apc_list":{"value":1415,"currency":"GBP","value_usd":1735},"apc_paid":{"value":1415,"currency":"GBP","value_usd":1735},"fwci":1.2804,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.79693685,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"2014","issue":"0","first_page":"bau056","last_page":"bau056"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.9828000068664551,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.973800003528595,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7438099980354309},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5279749035835266},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4572247564792633},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4087337255477905}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7438099980354309},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5279749035835266},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4572247564792633},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4087337255477905}],"mesh":[{"descriptor_ui":"D004194","descriptor_name":"Disease","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D004194","descriptor_name":"Disease","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D004194","descriptor_name":"Disease","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D014481","descriptor_name":"United States","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D014481","descriptor_name":"United States","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D014481","descriptor_name":"United States","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D064229","descriptor_name":"Biological Ontologies","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D064229","descriptor_name":"Biological Ontologies","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D064229","descriptor_name":"Biological Ontologies","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1093/database/bau056","is_oa":true,"landing_page_url":"https://doi.org/10.1093/database/bau056","pdf_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/bau056/8246294/bau056.pdf","source":{"id":"https://openalex.org/S4210201630","display_name":"Database","issn_l":"1758-0463","issn":["1758-0463"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311647","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/P4310311647"],"host_organization_lineage_names":["University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database","raw_type":"journal-article"},{"id":"pmid:24935050","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/24935050","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database : the journal of biological databases and curation","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:4058794","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/4058794","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Database (Oxford)","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1093/database/bau056","is_oa":true,"landing_page_url":"https://doi.org/10.1093/database/bau056","pdf_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/bau056/8246294/bau056.pdf","source":{"id":"https://openalex.org/S4210201630","display_name":"Database","issn_l":"1758-0463","issn":["1758-0463"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311647","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/P4310311647"],"host_organization_lineage_names":["University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7400000095367432}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W241117969","https://openalex.org/W1558788122","https://openalex.org/W1600348603","https://openalex.org/W1648311451","https://openalex.org/W1964670939","https://openalex.org/W1966446268","https://openalex.org/W1969245183","https://openalex.org/W2020357318","https://openalex.org/W2044833006","https://openalex.org/W2064299012","https://openalex.org/W2097606805","https://openalex.org/W2097960255","https://openalex.org/W2101289003","https://openalex.org/W2102563561","https://openalex.org/W2104148262","https://openalex.org/W2107435951","https://openalex.org/W2109591242","https://openalex.org/W2121844933","https://openalex.org/W2127241285","https://openalex.org/W2130596747","https://openalex.org/W2132288458","https://openalex.org/W2134967412","https://openalex.org/W2142016317","https://openalex.org/W2149660837","https://openalex.org/W2151532143","https://openalex.org/W2152966407","https://openalex.org/W2169099542","https://openalex.org/W2178441628","https://openalex.org/W2251104810","https://openalex.org/W2251108261","https://openalex.org/W2251988705","https://openalex.org/W2438784799","https://openalex.org/W2738143918","https://openalex.org/W2893578842","https://openalex.org/W2916147465"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W2350741829","https://openalex.org/W2530322880","https://openalex.org/W1596801655"],"abstract_inverted_index":{"BioC":[0,17,67,71,86],"is":[1],"a":[2],"new":[3],"format":[4],"and":[5,12,28,38,55,88,102],"associated":[6],"code":[7],"libraries":[8],"for":[9],"sharing":[10],"text":[11,73],"annotations.":[13],"We":[14],"have":[15,91],"implemented":[16],"natural":[18,40],"language":[19,41],"preprocessing":[20],"pipelines":[21,59,90],"in":[22],"two":[23],"popular":[24],"programming":[25],"languages:":[26],"C++":[27],"Java.":[29],"The":[30,45],"current":[31],"implementations":[32],"interface":[33],"with":[34,65],"the":[35,81,89],"well-known":[36],"MedPost":[37],"Stanford":[39],"processing":[42],"tool":[43],"sets.":[44],"pipeline":[46],"functionality":[47],"includes":[48],"sentence":[49,56],"segmentation,":[50],"tokenization,":[51],"part-of-speech":[52],"tagging,":[53],"lemmatization":[54],"parsing.":[57],"These":[58],"can":[60,104],"be":[61,105],"easily":[62],"integrated":[63],"along":[64],"other":[66],"programs":[68],"into":[69],"any":[70],"compliant":[72],"mining":[74],"systems.":[75],"As":[76],"an":[77],"application,":[78],"we":[79],"converted":[80],"NCBI":[82],"disease":[83],"corpus":[84,96],"to":[85,97],"format,":[87],"successfully":[92],"run":[93],"on":[94],"this":[95],"demonstrate":[98],"their":[99],"functionality.":[100],"Code":[101],"data":[103],"downloaded":[106],"from":[107],"http://bioc.sourceforge.net.":[108,111],"Database":[109],"URL:":[110]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2}],"updated_date":"2026-05-19T21:40:30.786675","created_date":"2025-10-10T00:00:00"}
