{"id":"https://openalex.org/W2147754794","doi":"https://doi.org/10.1186/1471-2105-9-s1-s7","title":"Rule-based knowledge aggregation for large-scale protein sequence analysis of influenza A viruses","display_name":"Rule-based knowledge aggregation for large-scale protein sequence analysis of influenza A viruses","publication_year":2008,"publication_date":"2008-02-01","ids":{"openalex":"https://openalex.org/W2147754794","doi":"https://doi.org/10.1186/1471-2105-9-s1-s7","mag":"2147754794","pmid":"https://pubmed.ncbi.nlm.nih.gov/18315860"},"language":"en","primary_location":{"id":"doi:10.1186/1471-2105-9-s1-s7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-9-s1-s7","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-9-S1-S7","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-9-S1-S7","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052502655","display_name":"Olivo Miotto","orcid":"https://orcid.org/0000-0001-8060-6771"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Olivo Miotto","raw_affiliation_strings":["Institute of Systems Science, National University of Singapore, 25 Heng Mui Keng Terrace, Singapore. olivo@nus.edu.sg","Institute of Systems Science, National University of Singapore, 25 Heng Mui Keng Terrace, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Systems Science, National University of Singapore, 25 Heng Mui Keng Terrace, Singapore. olivo@nus.edu.sg","institution_ids":["https://openalex.org/I165932596"]},{"raw_affiliation_string":"Institute of Systems Science, National University of Singapore, 25 Heng Mui Keng Terrace, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108317608","display_name":"Tin Wee Tan","orcid":"https://orcid.org/0009-0008-7829-3647"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Tin Wee Tan","raw_affiliation_strings":["Department of Biochemistry, Yong Loo Lin School of Medicine, National University of Singapore, 8 Medical Drive, Singapore"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Biochemistry, Yong Loo Lin School of Medicine, National University of Singapore, 8 Medical Drive, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089063979","display_name":"Vladimir Brusi\u0107","orcid":"https://orcid.org/0000-0003-0523-5266"},"institutions":[{"id":"https://openalex.org/I4210117453","display_name":"Dana-Farber Cancer Institute","ror":"https://ror.org/02jzgtq86","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210117453"]},{"id":"https://openalex.org/I165143802","display_name":"University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU","US"],"is_corresponding":false,"raw_author_name":"Vladimir Brusic","raw_affiliation_strings":["Cancer Vaccine Center, Dana-Farber Cancer Institute, 77 Avenue Louis Pasteur, Boston, USA","School of Land, Crop, and Food Sciences, University of Queensland, Brisbane, 4072, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cancer Vaccine Center, Dana-Farber Cancer Institute, 77 Avenue Louis Pasteur, Boston, USA","institution_ids":["https://openalex.org/I4210117453"]},{"raw_affiliation_string":"School of Land, Crop, and Food Sciences, University of Queensland, Brisbane, 4072, Australia","institution_ids":["https://openalex.org/I165143802"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5052502655"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":{"value":1690,"currency":"GBP","value_usd":2072},"apc_paid":{"value":1690,"currency":"GBP","value_usd":2072},"fwci":0.7011,"has_fulltext":true,"cited_by_count":31,"citation_normalized_percentile":{"value":0.71043652,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"9","issue":"S1","first_page":"S7","last_page":"S7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.7145000100135803,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.7145000100135803,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10167","display_name":"Influenza Virus Research Studies","score":0.11699999868869781,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11642","display_name":"Genomics and Rare Diseases","score":0.016699999570846558,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.6139655113220215},{"id":"https://openalex.org/keywords/sequence-analysis","display_name":"Sequence analysis","score":0.570572018623352},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5235354900360107},{"id":"https://openalex.org/keywords/dna-microarray","display_name":"DNA microarray","score":0.49819231033325195},{"id":"https://openalex.org/keywords/virology","display_name":"Virology","score":0.45147505402565},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4391598105430603},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.37866348028182983},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.3774293065071106},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.33132678270339966},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.264782190322876},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.19038188457489014},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.12136167287826538},{"id":"https://openalex.org/keywords/gene-expression","display_name":"Gene expression","score":0.09880557656288147}],"concepts":[{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.6139655113220215},{"id":"https://openalex.org/C61053724","wikidata":"https://www.wikidata.org/wiki/Q1154615","display_name":"Sequence analysis","level":3,"score":0.570572018623352},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5235354900360107},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.49819231033325195},{"id":"https://openalex.org/C159047783","wikidata":"https://www.wikidata.org/wiki/Q7215","display_name":"Virology","level":1,"score":0.45147505402565},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4391598105430603},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.37866348028182983},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.3774293065071106},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.33132678270339966},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.264782190322876},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.19038188457489014},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.12136167287826538},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.09880557656288147},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000595","descriptor_name":"Amino Acid Sequence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D001185","descriptor_name":"Artificial Intelligence","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D003628","descriptor_name":"Database Management Systems","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D008969","descriptor_name":"Molecular Sequence Data","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008969","descriptor_name":"Molecular Sequence Data","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D008969","descriptor_name":"Molecular Sequence Data","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009980","descriptor_name":"Influenza A virus","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D009980","descriptor_name":"Influenza A virus","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D009980","descriptor_name":"Influenza A virus","qualifier_ui":"Q000378","qualifier_name":"metabolism","is_major_topic":false},{"descriptor_ui":"D014764","descriptor_name":"Viral Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D014764","descriptor_name":"Viral Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D014764","descriptor_name":"Viral Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D015201","descriptor_name":"Meta-Analysis as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015201","descriptor_name":"Meta-Analysis as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D015201","descriptor_name":"Meta-Analysis as Topic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D020539","descriptor_name":"Sequence Analysis, Protein","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D020539","descriptor_name":"Sequence Analysis, Protein","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D020539","descriptor_name":"Sequence Analysis, Protein","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D030562","descriptor_name":"Databases, Protein","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":8,"locations":[{"id":"doi:10.1186/1471-2105-9-s1-s7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-9-s1-s7","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-9-S1-S7","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},{"id":"pmid:18315860","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/18315860","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC bioinformatics","raw_type":null},{"id":"pmh:oai:scholarbank.nus.edu.sg:10635/28916","is_oa":false,"landing_page_url":"http://scholarbank.nus.edu.sg/handle/10635/28916","pdf_url":null,"source":{"id":"https://openalex.org/S7407052290","display_name":"National University of Singapore","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Scopus","raw_type":"Article"},{"id":"pmh:oai:doaj.org/article:9a9f381ad60d47d58deabcff03aa08f2","is_oa":false,"landing_page_url":"https://doaj.org/article/9a9f381ad60d47d58deabcff03aa08f2","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics, Vol 9, Iss Suppl 1, p S7 (2008)","raw_type":"article"},{"id":"pmh:oai:espace.library.uq.edu.au:UQ:315703","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306402388","display_name":"Queensland's institutional digital repository (The University of Queensland)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I165143802","host_organization_name":"The University of Queensland","host_organization_lineage":["https://openalex.org/I165143802"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"},{"id":"pmh:oai:ora.ox.ac.uk:uuid:0f31a416-1f93-4104-96de-c58e0b85452f","is_oa":true,"landing_page_url":"https://ora.ox.ac.uk/objects/uuid:0f31a416-1f93-4104-96de-c58e0b85452f","pdf_url":null,"source":{"id":"https://openalex.org/S4306402636","display_name":"Oxford University Research Archive (ORA) (University of Oxford)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I40120149","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/I40120149"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"PubMed (http://www.ncbi.nlm.nih.gov/pubmed/)","raw_type":"Journal article"},{"id":"pmh:oai:ora.ox.ac.uk:uuid:eec53041-1125-4d76-a351-fd18a5afe3fe","is_oa":false,"landing_page_url":"https://ora.ox.ac.uk/objects/uuid:eec53041-1125-4d76-a351-fd18a5afe3fe","pdf_url":null,"source":{"id":"https://openalex.org/S4306402636","display_name":"Oxford University Research Archive (ORA) (University of Oxford)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I40120149","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/I40120149"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Symplectic Elements at Oxford","raw_type":"Journal article"},{"id":"pmh:oai:pubmedcentral.nih.gov:2259408","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/2259408","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"BMC Bioinformatics","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/1471-2105-9-s1-s7","is_oa":true,"landing_page_url":"https://doi.org/10.1186/1471-2105-9-s1-s7","pdf_url":"https://bmcbioinformatics.biomedcentral.com/counter/pdf/10.1186/1471-2105-9-S1-S7","source":{"id":"https://openalex.org/S19032547","display_name":"BMC Bioinformatics","issn_l":"1471-2105","issn":["1471-2105"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"BMC Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4099999964237213,"display_name":"Responsible consumption and production","id":"https://metadata.un.org/sdg/12"}],"awards":[{"id":"https://openalex.org/G1457133460","display_name":null,"funder_award_id":"U19 AI56541","funder_id":"https://openalex.org/F4320337355","funder_display_name":"National Institute of Allergy and Infectious Diseases"}],"funders":[{"id":"https://openalex.org/F4320306085","display_name":"U.S. Department of Health and Human Services","ror":"https://ror.org/033jnv181"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337355","display_name":"National Institute of Allergy and Infectious Diseases","ror":"https://ror.org/043z4tv69"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2147754794.pdf","grobid_xml":"https://content.openalex.org/works/W2147754794.grobid-xml"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W174628062","https://openalex.org/W181537133","https://openalex.org/W1487060125","https://openalex.org/W1499898004","https://openalex.org/W1502374275","https://openalex.org/W1552138711","https://openalex.org/W1562643504","https://openalex.org/W1965014786","https://openalex.org/W1974833742","https://openalex.org/W1996599952","https://openalex.org/W2005754100","https://openalex.org/W2022679230","https://openalex.org/W2060333218","https://openalex.org/W2083752694","https://openalex.org/W2091526576","https://openalex.org/W2098098151","https://openalex.org/W2104568825","https://openalex.org/W2107247043","https://openalex.org/W2115899778","https://openalex.org/W2133034723","https://openalex.org/W2166582607","https://openalex.org/W2916858871","https://openalex.org/W3191676131","https://openalex.org/W4239696231","https://openalex.org/W4245145908","https://openalex.org/W4285719527","https://openalex.org/W6609930034","https://openalex.org/W7053032915"],"related_works":["https://openalex.org/W2415545834","https://openalex.org/W2069695727","https://openalex.org/W1802639773","https://openalex.org/W2118505788","https://openalex.org/W4254166511","https://openalex.org/W2150084125","https://openalex.org/W1542823905","https://openalex.org/W1490878237","https://openalex.org/W2990314835","https://openalex.org/W2112284452"],"abstract_inverted_index":{"BACKGROUND:":[0],"The":[1],"explosive":[2],"growth":[3],"of":[4,15,24,26,59,66,79,97,134,183,193,230,250,300,318],"biological":[5,53],"data":[6],"provides":[7],"opportunities":[8],"for":[9,75,287,312],"new":[10],"statistical":[11],"and":[12,38,49,61,93,106,132,164,233],"comparative":[13],"analyses":[14],"large":[16],"information":[17,148],"sets,":[18],"such":[19,29],"as":[20],"alignments":[21],"comprising":[22],"tens":[23],"thousands":[25,78],"sequences.":[27],"In":[28,81],"studies,":[30],"sequence":[31],"annotations":[32],"frequently":[33],"play":[34],"an":[35,98],"essential":[36],"role,":[37],"reliable":[39],"results":[40,291],"depend":[41],"on":[42,298],"metadata":[43,100,252],"quality.":[44],"However,":[45],"the":[46,57,95,242,272,310,319],"semantic":[47,107,194,231,322],"heterogeneity":[48],"annotation":[50],"inconsistencies":[51],"in":[52,189,276],"databases":[54,212],"greatly":[55],"increase":[56],"complexity":[58],"aggregating":[60],"cleaning":[62],"metadata.":[63],"Manual":[64],"curation":[65,307],"datasets,":[67],"traditionally":[68],"favoured":[69],"by":[70,146,170],"life":[71],"scientists,":[72],"is":[73,285],"impractical":[74],"studies":[76],"involving":[77],"records.":[80,320],"this":[82,111,341],"study,":[83],"we":[84,236],"investigate":[85],"quality":[86,209,273],"issues":[87,274],"that":[88,103,293],"affect":[89],"major":[90],"public":[91,156,277],"databases,":[92,278],"quantify":[94],"effectiveness":[96],"automated":[99,279],"extraction":[101],"approach":[102,112],"combines":[104],"structural":[105,173,203],"rules.":[108,174],"We":[109],"applied":[110],"to":[113,120,200,205,263,315,327,336,340],"more":[114,150,219,258],"than":[115,151,221,259],"90,000":[116,152],"influenza":[117],"A":[118,141],"records,":[119,184],"annotate":[121],"sequences":[122,143,240],"with":[123,185,282,333],"protein":[124,142],"name,":[125],"virus":[126],"subtype,":[127],"isolate,":[128,244],"host,":[129],"geographic":[130],"origin,":[131],"year":[133],"isolation.":[135],"RESULTS:":[136],"Over":[137],"40,000":[138],"annotated":[139],"Influenza":[140],"were":[144,160,179,213],"collected":[145],"combining":[147],"from":[149,154,166,181,224,241],"documents":[153,216,222],"NCBI":[155],"databases.":[157],"Metadata":[158],"values":[159,178,218],"automatically":[161],"extracted,":[162],"aggregated":[163],"reconciled":[165],"several":[167],"document":[168],"fields":[169],"applying":[171],"user-defined":[172],"For":[175],"each":[176,196],"property,":[177],"recovered":[180],">/=88.8%":[182],"accuracy":[186],"exceeding":[187],"96%":[188],"most":[190],"cases.":[191],"Because":[192],"heterogeneity,":[195],"property":[197,266],"required":[198],"up":[199],"six":[201],"different":[202],"rules":[204,232],"be":[206],"combined.":[207],"Significant":[208],"differences":[210],"between":[211,239],"found:":[214],"GenBank":[215],"yield":[217],"reliably":[220,304],"extracted":[223],"GenPept.":[225],"Using":[226],"a":[227,234,254,334],"simple":[228,255,301],"set":[229],"reasoner,":[235],"reconstructed":[237],"relationships":[238],"same":[243],"thus":[245],"identifying":[246],"7640":[247],"isolates.":[248],"Validation":[249],"isolate":[251],"against":[253],"ontology":[256],"highlighted":[257],"400":[260],"inconsistencies,":[261],"leading":[262],"over":[264],"3,000":[265],"value":[267],"corrections.":[268],"CONCLUSION:":[269],"To":[270],"overcome":[271],"inherent":[275],"knowledge":[280,330],"aggregation":[281,331],"embedded":[283],"intelligence":[284],"needed":[286],"large-scale":[288],"analyses.":[289],"Our":[290],"show":[292],"user-controlled":[294],"intuitive":[295],"approaches,":[296],"based":[297],"combination":[299],"rules,":[302],"can":[303],"automate":[305],"various":[306],"tasks,":[308,332],"reducing":[309],"need":[311],"manual":[313],"corrections":[314],"approximately":[316],"5%":[317],"Emerging":[321],"technologies":[323],"possess":[324],"desirable":[325],"features":[326],"support":[328],"today's":[329],"potential":[335],"bring":[337],"immediate":[338],"benefits":[339],"field.":[342]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
