{"id":"https://openalex.org/W4405686831","doi":"https://doi.org/10.1093/bib/bbae652","title":"Annotating publicly-available samples and studies using interpretable modeling of unstructured metadata","display_name":"Annotating publicly-available samples and studies using interpretable modeling of unstructured metadata","publication_year":2024,"publication_date":"2024-11-22","ids":{"openalex":"https://openalex.org/W4405686831","doi":"https://doi.org/10.1093/bib/bbae652","pmid":"https://pubmed.ncbi.nlm.nih.gov/39710433"},"language":"en","primary_location":{"id":"doi:10.1093/bib/bbae652","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbae652","pdf_url":"https://academic.oup.com/bib/article-pdf/26/1/bbae652/61254046/bbae652.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://academic.oup.com/bib/article-pdf/26/1/bbae652/61254046/bbae652.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100619852","display_name":"Hao Yuan","orcid":"https://orcid.org/0000-0002-8848-1595"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hao Yuan","raw_affiliation_strings":["Ecology, Evolution, and Behavior Program, Michigan State University , East Lansing, MI 48823 ,","Genetics and Genome Sciences Program, Michigan State University , East Lansing, MI 48823 ,"],"affiliations":[{"raw_affiliation_string":"Ecology, Evolution, and Behavior Program, Michigan State University , East Lansing, MI 48823 ,","institution_ids":["https://openalex.org/I87216513"]},{"raw_affiliation_string":"Genetics and Genome Sciences Program, Michigan State University , East Lansing, MI 48823 ,","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031413580","display_name":"Parker Hicks","orcid":"https://orcid.org/0000-0002-8102-5458"},"institutions":[{"id":"https://openalex.org/I51713134","display_name":"University of Colorado Anschutz Medical Campus","ror":"https://ror.org/03wmf1y16","country_code":"US","type":"education","lineage":["https://openalex.org/I51713134"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Parker Hicks","raw_affiliation_strings":["Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO 80045 ,"],"affiliations":[{"raw_affiliation_string":"Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO 80045 ,","institution_ids":["https://openalex.org/I51713134"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074813904","display_name":"Mansooreh Ahmadian","orcid":"https://orcid.org/0000-0002-5020-3979"},"institutions":[{"id":"https://openalex.org/I51713134","display_name":"University of Colorado Anschutz Medical Campus","ror":"https://ror.org/03wmf1y16","country_code":"US","type":"education","lineage":["https://openalex.org/I51713134"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mansooreh Ahmadian","raw_affiliation_strings":["Department of Biostatistics and Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO 80045 ,"],"affiliations":[{"raw_affiliation_string":"Department of Biostatistics and Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO 80045 ,","institution_ids":["https://openalex.org/I51713134"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041275039","display_name":"Kayla A Johnson","orcid":"https://orcid.org/0000-0002-0889-5705"},"institutions":[{"id":"https://openalex.org/I51713134","display_name":"University of Colorado Anschutz Medical Campus","ror":"https://ror.org/03wmf1y16","country_code":"US","type":"education","lineage":["https://openalex.org/I51713134"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kayla A Johnson","raw_affiliation_strings":["Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO 80045 ,"],"affiliations":[{"raw_affiliation_string":"Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO 80045 ,","institution_ids":["https://openalex.org/I51713134"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106898910","display_name":"Lydia Valtadoros","orcid":null},"institutions":[{"id":"https://openalex.org/I51713134","display_name":"University of Colorado Anschutz Medical Campus","ror":"https://ror.org/03wmf1y16","country_code":"US","type":"education","lineage":["https://openalex.org/I51713134"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lydia Valtadoros","raw_affiliation_strings":["Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO 80045 ,"],"affiliations":[{"raw_affiliation_string":"Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO 80045 ,","institution_ids":["https://openalex.org/I51713134"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053907733","display_name":"Arjun Krishnan","orcid":"https://orcid.org/0000-0002-7980-4110"},"institutions":[{"id":"https://openalex.org/I51713134","display_name":"University of Colorado Anschutz Medical Campus","ror":"https://ror.org/03wmf1y16","country_code":"US","type":"education","lineage":["https://openalex.org/I51713134"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arjun Krishnan","raw_affiliation_strings":["Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO 80045 ,"],"affiliations":[{"raw_affiliation_string":"Department of Biomedical Informatics, University of Colorado Anschutz Medical Campus , Aurora, CO 80045 ,","institution_ids":["https://openalex.org/I51713134"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100619852"],"corresponding_institution_ids":["https://openalex.org/I87216513"],"apc_list":{"value":4011,"currency":"USD","value_usd":4011},"apc_paid":{"value":4011,"currency":"USD","value_usd":4011},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20750064,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"26","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9804999828338623,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8591644763946533},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8310018181800842},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7514759302139282},{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.6331028342247009},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5986733436584473},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.5680152177810669},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.562296450138092},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5100491642951965},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.48105835914611816},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4604770243167877},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4317907691001892},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4290293753147125},{"id":"https://openalex.org/keywords/controlled-vocabulary","display_name":"Controlled vocabulary","score":0.4221954345703125},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.25987663865089417},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.24534398317337036},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.10573112964630127},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.0875069797039032}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8591644763946533},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8310018181800842},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7514759302139282},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.6331028342247009},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5986733436584473},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.5680152177810669},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.562296450138092},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5100491642951965},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.48105835914611816},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4604770243167877},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4317907691001892},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4290293753147125},{"id":"https://openalex.org/C110615152","wikidata":"https://www.wikidata.org/wiki/Q1469824","display_name":"Controlled vocabulary","level":2,"score":0.4221954345703125},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.25987663865089417},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.24534398317337036},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.10573112964630127},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0875069797039032},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000071253","descriptor_name":"Metadata","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000071253","descriptor_name":"Metadata","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000071253","descriptor_name":"Metadata","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000071253","descriptor_name":"Metadata","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D018875","descriptor_name":"Vocabulary, Controlled","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D018875","descriptor_name":"Vocabulary, Controlled","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D018875","descriptor_name":"Vocabulary, Controlled","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D018875","descriptor_name":"Vocabulary, Controlled","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D040901","descriptor_name":"Proteomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D040901","descriptor_name":"Proteomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D040901","descriptor_name":"Proteomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D040901","descriptor_name":"Proteomics","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false}],"locations_count":3,"locations":[{"id":"doi:10.1093/bib/bbae652","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbae652","pdf_url":"https://academic.oup.com/bib/article-pdf/26/1/bbae652/61254046/bbae652.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},{"id":"pmid:39710433","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/39710433","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in bioinformatics","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:11663484","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/11663484","pdf_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11663484/pdf/bbae652.pdf","source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Brief Bioinform","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1093/bib/bbae652","is_oa":true,"landing_page_url":"https://doi.org/10.1093/bib/bbae652","pdf_url":"https://academic.oup.com/bib/article-pdf/26/1/bbae652/61254046/bbae652.pdf","source":{"id":"https://openalex.org/S91767247","display_name":"Briefings in Bioinformatics","issn_l":"1467-5463","issn":["1467-5463","1477-4054"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Briefings in Bioinformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7599999904632568,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G4826041763","display_name":null,"funder_award_id":"2328140","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4405686831.pdf","grobid_xml":"https://content.openalex.org/works/W4405686831.grobid-xml"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W1981849943","https://openalex.org/W2098824882","https://openalex.org/W2116034137","https://openalex.org/W2116041602","https://openalex.org/W2122402213","https://openalex.org/W2122825543","https://openalex.org/W2137632714","https://openalex.org/W2302501749","https://openalex.org/W2334617819","https://openalex.org/W2341128841","https://openalex.org/W2398811468","https://openalex.org/W2479132886","https://openalex.org/W2524066112","https://openalex.org/W2525778437","https://openalex.org/W2907094739","https://openalex.org/W2913195599","https://openalex.org/W2949612967","https://openalex.org/W3006139292","https://openalex.org/W3046375318","https://openalex.org/W3085139254","https://openalex.org/W3113049731","https://openalex.org/W3130317325","https://openalex.org/W3133324619","https://openalex.org/W3208691473","https://openalex.org/W3217671076","https://openalex.org/W4210909814","https://openalex.org/W4220967417","https://openalex.org/W4224037213","https://openalex.org/W4226146505","https://openalex.org/W4280531620","https://openalex.org/W4281717669","https://openalex.org/W4294221637","https://openalex.org/W4308572714","https://openalex.org/W4312516176","https://openalex.org/W4385456320","https://openalex.org/W4385573017","https://openalex.org/W4391913563","https://openalex.org/W6606688646","https://openalex.org/W6608310390","https://openalex.org/W6727690538","https://openalex.org/W6802712164"],"related_works":["https://openalex.org/W2905433371","https://openalex.org/W2888392564","https://openalex.org/W4310278675","https://openalex.org/W4388422664","https://openalex.org/W4390569940","https://openalex.org/W4361193272","https://openalex.org/W2963326959","https://openalex.org/W2118717649","https://openalex.org/W4388685194","https://openalex.org/W4378220270"],"abstract_inverted_index":{"Reusing":[0],"massive":[1],"collections":[2],"of":[3,32,62,133,140,168],"publicly":[4],"available":[5,179],"biomedical":[6,56,165],"data":[7],"can":[8,163],"significantly":[9],"impact":[10],"knowledge":[11],"discovery.":[12],"However,":[13],"these":[14],"public":[15],"samples":[16],"and":[17,29,51,64,90,120,155,175],"studies":[18,149],"are":[19,98,178],"typically":[20],"described":[21],"using":[22,153],"unstructured":[23,57],"plain":[24],"text,":[25,128],"hindering":[26],"the":[27,33,68,126,131,138],"findability":[28],"further":[30],"reuse":[31],"data.":[34],"To":[35],"combat":[36],"this":[37,79],"problem,":[38],"we":[39],"propose":[40],"txt2onto":[41,141],"2.0,":[42],"a":[43,105],"general-purpose":[44],"method":[45],"based":[46],"on":[47],"natural":[48],"language":[49,107],"processing":[50],"machine":[52],"learning":[53],"for":[54,148],"annotating":[55],"metadata":[58],"to":[59,67,111,117],"controlled":[60],"vocabularies":[61],"diseases":[63],"tissues.":[65],"Compared":[66],"previous":[69],"version":[70,81],"(txt2onto":[71],"1.0),":[72],"which":[73],"uses":[74,82,102],"numerical":[75],"embeddings":[76,103],"as":[77,84,158],"features,":[78,85],"new":[80],"words":[83,115],"resulting":[86],"in":[87],"improved":[88],"interpretability":[89],"performance,":[91],"especially":[92],"when":[93],"few":[94],"positive":[95],"training":[96],"instances":[97],"available.":[99],"Txt2onto":[100],"2.0":[101,142],"from":[104,125,150],"large":[106],"model":[108],"during":[109],"prediction":[110],"deal":[112],"with":[113],"unseen-yet-relevant":[114],"related":[116],"each":[118],"disease":[119,146],"tissue":[121],"term":[122],"being":[123],"predicted":[124],"input":[127],"thereby":[129],"explaining":[130],"basis":[132],"every":[134],"annotation.":[135],"We":[136],"demonstrate":[137],"generalizability":[139],"by":[143],"accurately":[144],"predicting":[145],"annotations":[147],"independent":[151],"datasets,":[152],"proteomics":[154],"clinical":[156],"trials":[157],"examples.":[159],"Overall,":[160],"our":[161],"approach":[162],"annotate":[164],"text":[166],"regardless":[167],"experimental":[169],"types":[170],"or":[171],"sources.":[172],"Code,":[173],"data,":[174],"trained":[176],"models":[177],"at":[180],"https://github.com/krishnanlab/txt2onto2.0.":[181]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
