{"id":"https://openalex.org/W3159409148","doi":"https://doi.org/10.1093/database/baab021","title":"Increasing metadata coverage of SRA BioSample entries using deep learning\u2013based named entity recognition","display_name":"Increasing metadata coverage of SRA BioSample entries using deep learning\u2013based named entity recognition","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3159409148","doi":"https://doi.org/10.1093/database/baab021","mag":"3159409148","pmid":"https://pubmed.ncbi.nlm.nih.gov/33914028"},"language":"en","primary_location":{"id":"doi:10.1093/database/baab021","is_oa":true,"landing_page_url":"https://doi.org/10.1093/database/baab021","pdf_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/baab021/37578643/baab021.pdf","source":{"id":"https://openalex.org/S4210201630","display_name":"Database","issn_l":"1758-0463","issn":["1758-0463"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311647","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/P4310311647"],"host_organization_lineage_names":["University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/baab021/37578643/baab021.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035688269","display_name":"Adam Klie","orcid":"https://orcid.org/0000-0002-7600-3086"},"institutions":[{"id":"https://openalex.org/I4210132395","display_name":"University of California San Diego Medical Center","ror":"https://ror.org/03aw5sn18","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2800935791","https://openalex.org/I4210132395"]},{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adam Klie","raw_affiliation_strings":["Bioinformatics and Systems Biology Program, University of California San Diego, La Jolla, CA 92093, USA","Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA"],"affiliations":[{"raw_affiliation_string":"Bioinformatics and Systems Biology Program, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I4210132395"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088713741","display_name":"Brian Tsui","orcid":"https://orcid.org/0000-0001-8017-5895"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]},{"id":"https://openalex.org/I4210132395","display_name":"University of California San Diego Medical Center","ror":"https://ror.org/03aw5sn18","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2800935791","https://openalex.org/I4210132395"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian Y Tsui","raw_affiliation_strings":["Bioinformatics and Systems Biology Program, University of California San Diego, La Jolla, CA 92093, USA","Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA"],"affiliations":[{"raw_affiliation_string":"Bioinformatics and Systems Biology Program, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I4210132395"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031990922","display_name":"Shamim Mollah","orcid":"https://orcid.org/0000-0001-9178-8339"},"institutions":[{"id":"https://openalex.org/I204465549","display_name":"Washington University in St. Louis","ror":"https://ror.org/01yc7t268","country_code":"US","type":"education","lineage":["https://openalex.org/I204465549"]},{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shamim Mollah","raw_affiliation_strings":["Bioinformatics and Systems Biology Program, University of California San Diego, La Jolla, CA 92093, USA","Department of Bioengineering, University of California San Diego, La Jolla, CA 92093, USA","Department of Genetics, Washington University in St. Louis, St. Louis, MO 63130, USA"],"affiliations":[{"raw_affiliation_string":"Bioinformatics and Systems Biology Program, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Department of Bioengineering, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Department of Genetics, Washington University in St. Louis, St. Louis, MO 63130, USA","institution_ids":["https://openalex.org/I204465549"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077225646","display_name":"Dylan Skola","orcid":"https://orcid.org/0000-0002-3655-1643"},"institutions":[{"id":"https://openalex.org/I4210132395","display_name":"University of California San Diego Medical Center","ror":"https://ror.org/03aw5sn18","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2800935791","https://openalex.org/I4210132395"]},{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dylan Skola","raw_affiliation_strings":["Bioinformatics and Systems Biology Program, University of California San Diego, La Jolla, CA 92093, USA","Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA"],"affiliations":[{"raw_affiliation_string":"Bioinformatics and Systems Biology Program, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I4210132395"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113167133","display_name":"Michelle T. Dow","orcid":null},"institutions":[{"id":"https://openalex.org/I4210132395","display_name":"University of California San Diego Medical Center","ror":"https://ror.org/03aw5sn18","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2800935791","https://openalex.org/I4210132395"]},{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michelle Dow","raw_affiliation_strings":["Bioinformatics and Systems Biology Program, University of California San Diego, La Jolla, CA 92093, USA","Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA"],"affiliations":[{"raw_affiliation_string":"Bioinformatics and Systems Biology Program, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I4210132395"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056615015","display_name":"Chun-Nan Hsu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210132395","display_name":"University of California San Diego Medical Center","ror":"https://ror.org/03aw5sn18","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2800935791","https://openalex.org/I4210132395"]},{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chun-Nan Hsu","raw_affiliation_strings":["Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA","Department of Neurosciences, University of California San Diego, La Jolla, CA 92093, USA"],"affiliations":[{"raw_affiliation_string":"Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I4210132395"]},{"raw_affiliation_string":"Department of Neurosciences, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101461760","display_name":"Hannah Carter","orcid":"https://orcid.org/0000-0002-1729-2463"},"institutions":[{"id":"https://openalex.org/I4210132395","display_name":"University of California San Diego Medical Center","ror":"https://ror.org/03aw5sn18","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I2800935791","https://openalex.org/I4210132395"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hannah Carter","raw_affiliation_strings":["Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA"],"affiliations":[{"raw_affiliation_string":"Department of Medicine, Division of Medical Genetics, University of California San Diego, La Jolla, CA 92093, USA","institution_ids":["https://openalex.org/I4210132395"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5056615015","https://openalex.org/A5101461760"],"corresponding_institution_ids":["https://openalex.org/I36258959","https://openalex.org/I4210132395"],"apc_list":{"value":1415,"currency":"GBP","value_usd":1735},"apc_paid":{"value":1415,"currency":"GBP","value_usd":1735},"fwci":1.2173,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.78516243,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"2021","issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.8597664833068848},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7537095546722412},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.571549654006958},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5460699200630188},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44140610098838806},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.435380756855011},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38758188486099243},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.360524445772171}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.8597664833068848},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7537095546722412},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.571549654006958},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5460699200630188},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44140610098838806},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.435380756855011},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38758188486099243},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.360524445772171},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000071253","descriptor_name":"Metadata","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000071253","descriptor_name":"Metadata","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000071253","descriptor_name":"Metadata","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000077321","descriptor_name":"Deep Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015203","descriptor_name":"Reproducibility of Results","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015203","descriptor_name":"Reproducibility of Results","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015203","descriptor_name":"Reproducibility of Results","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":5,"locations":[{"id":"doi:10.1093/database/baab021","is_oa":true,"landing_page_url":"https://doi.org/10.1093/database/baab021","pdf_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/baab021/37578643/baab021.pdf","source":{"id":"https://openalex.org/S4210201630","display_name":"Database","issn_l":"1758-0463","issn":["1758-0463"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311647","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/P4310311647"],"host_organization_lineage_names":["University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database","raw_type":"journal-article"},{"id":"pmid:33914028","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/33914028","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database : the journal of biological databases and curation","raw_type":null},{"id":"pmh:oai:digitalcommons.wustl.edu:oa_4-1937","is_oa":true,"landing_page_url":"https://digitalcommons.wustl.edu/oa_4/941","pdf_url":null,"source":{"id":"https://openalex.org/S4306400764","display_name":"Digital Commons@Becker (Washington University School of Medicine)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I204465549","host_organization_name":"Washington University in St. Louis","host_organization_lineage":["https://openalex.org/I204465549"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2020-Current year OA Pubs","raw_type":"text"},{"id":"pmh:oai:escholarship.org:ark:/13030/qt2kh5w1xz","is_oa":true,"landing_page_url":"https://escholarship.org/uc/item/2kh5w1xz","pdf_url":null,"source":{"id":"https://openalex.org/S4306400115","display_name":"eScholarship (California Digital Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801248553","host_organization_name":"California Digital Library","host_organization_lineage":["https://openalex.org/I2801248553"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Database, vol 2021","raw_type":"article"},{"id":"pmh:oai:pubmedcentral.nih.gov:8083811","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8083811","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Database (Oxford)","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1093/database/baab021","is_oa":true,"landing_page_url":"https://doi.org/10.1093/database/baab021","pdf_url":"https://academic.oup.com/database/article-pdf/doi/10.1093/database/baab021/37578643/baab021.pdf","source":{"id":"https://openalex.org/S4210201630","display_name":"Database","issn_l":"1758-0463","issn":["1758-0463"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311647","host_organization_name":"University of Oxford","host_organization_lineage":["https://openalex.org/P4310311647"],"host_organization_lineage_names":["University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Database","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1304534115","display_name":null,"funder_award_id":"T32GM8806","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G5289894485","display_name":null,"funder_award_id":"FL-000655","funder_id":"https://openalex.org/F4320309949","funder_display_name":"Canadian Institute for Advanced Research"}],"funders":[{"id":"https://openalex.org/F4320309949","display_name":"Canadian Institute for Advanced Research","ror":"https://ror.org/01sdtdd95"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W3159409148.pdf"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W1964550430","https://openalex.org/W2020541351","https://openalex.org/W2036288429","https://openalex.org/W2066453373","https://openalex.org/W2079735306","https://openalex.org/W2116041602","https://openalex.org/W2153579005","https://openalex.org/W2302501749","https://openalex.org/W2515248967","https://openalex.org/W2606715885","https://openalex.org/W2756231919","https://openalex.org/W2768618120","https://openalex.org/W2773787581","https://openalex.org/W2883686584","https://openalex.org/W2907094739","https://openalex.org/W2924308178","https://openalex.org/W2949612967","https://openalex.org/W2950750198","https://openalex.org/W2953067954","https://openalex.org/W2964163406","https://openalex.org/W3100689701","https://openalex.org/W4294170691","https://openalex.org/W6682691769","https://openalex.org/W6725658358","https://openalex.org/W6755207826"],"related_works":["https://openalex.org/W2058118494","https://openalex.org/W2392768766","https://openalex.org/W2382021449","https://openalex.org/W3017222382","https://openalex.org/W3128216712","https://openalex.org/W3136915866","https://openalex.org/W4390279576","https://openalex.org/W2886890203","https://openalex.org/W4313535650","https://openalex.org/W2287770975"],"abstract_inverted_index":{"High-quality":[0],"metadata":[1,42,52,77,96,124,176,191,206],"annotations":[2,177],"for":[3,12,16,33,167,189,196,214],"data":[4],"hosted":[5],"in":[6,29,178,208],"large":[7],"public":[8],"repositories":[9],"are":[10,40],"essential":[11],"research":[13],"reproducibility":[14],"and":[15,20,98,103,113,155,163,193],"conducting":[17],"fast,":[18],"powerful":[19],"scalable":[21],"meta-analyses.":[22],"Currently,":[23],"a":[24,69,136],"majority":[25],"of":[26,54,111,131,138,165,185],"sequencing":[27],"samples":[28,139],"the":[30,51,106,127,174,183,194,200,212],"National":[31],"Center":[32],"Biotechnology":[34],"Information's":[35],"Sequence":[36],"Read":[37],"Archive":[38],"(SRA)":[39],"missing":[41,76],"across":[43],"several":[44],"categories.":[45],"In":[46],"an":[47,100],"effort":[48],"to":[49,67,88,94,121,204],"improve":[50],"coverage":[53,207],"these":[55],"samples,":[56,132],"we":[57],"leveraged":[58],"almost":[59],"44":[60],"million":[61],"attribute-value":[62],"pairs":[63],"from":[64,126,141,158],"SRA":[65],"BioSample":[66,209],"train":[68],"scalable,":[70],"recurrent":[71,186],"neural":[72,187],"network":[73,84],"that":[74],"predicts":[75],"via":[78],"named":[79],"entity":[80],"recognition":[81],"(NER).":[82],"The":[83],"was":[85],"first":[86],"trained":[87],"classify":[89],"short":[90],"text":[91],"phrases":[92],"according":[93],"11":[95,123],"categories":[97,125,169],"achieved":[99],"overall":[101],"accuracy":[102],"area":[104],"under":[105],"receiver":[107],"operating":[108],"characteristic":[109],"curve":[110],"85.2%":[112],"0.977,":[114],"respectively.":[115],"We":[116],"then":[117],"applied":[118],"our":[119],"classifier":[120],"predict":[122],"longer":[128],"TITLE":[129],"attribute":[130],"evaluating":[133],"performance":[134],"on":[135],"set":[137],"withheld":[140],"model":[142],"training.":[143],"Prediction":[144],"accuracies":[145,162],"were":[146],"high":[147],"when":[148],"extracting":[149],"sample":[150],"Genus/Species":[151],"(94.85%),":[152],"Condition/Disease":[153],"(95.65%)":[154],"Strain":[156],"(82.03%)":[157],"TITLEs,":[159],"with":[160,173],"lower":[161],"lack":[164],"predictions":[166],"other":[168],"highlighting":[170],"multiple":[171],"issues":[172],"current":[175],"BioSample.":[179],"These":[180],"results":[181],"indicate":[182],"utility":[184],"networks":[188],"NER-based":[190],"prediction":[192],"potential":[195],"models":[197],"such":[198],"as":[199],"one":[201],"presented":[202],"here":[203],"increase":[205],"while":[210],"minimizing":[211],"need":[213],"manual":[215],"curation.":[216],"Database":[217],"URL:":[218],"https://github.com/cartercompbio/PredictMEE.":[219]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":3}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
