{"id":"https://openalex.org/W4283773284","doi":"https://doi.org/10.48550/arxiv.2206.15076","title":"BigBIO: A Framework for Data-Centric Biomedical Natural Language Processing","display_name":"BigBIO: A Framework for Data-Centric Biomedical Natural Language Processing","publication_year":2022,"publication_date":"2022-06-30","ids":{"openalex":"https://openalex.org/W4283773284","doi":"https://doi.org/10.48550/arxiv.2206.15076"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2206.15076","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.15076","pdf_url":"https://arxiv.org/pdf/2206.15076","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2206.15076","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028277225","display_name":"Jason Fries","orcid":"https://orcid.org/0000-0001-9316-5768"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Fries, Jason Alan","raw_affiliation_strings":["Stanford University (450 Serra Mall, Stanford, CA 94305-2004 - United States)"],"affiliations":[{"raw_affiliation_string":"Stanford University (450 Serra Mall, Stanford, CA 94305-2004 - United States)","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041680406","display_name":"Leon Weber","orcid":"https://orcid.org/0000-0002-2499-472X"},"institutions":[{"id":"https://openalex.org/I205582932","display_name":"Max Delbr\u00fcck Center","ror":"https://ror.org/04p5ggc03","country_code":"DE","type":"facility","lineage":["https://openalex.org/I1305996414","https://openalex.org/I205582932"]},{"id":"https://openalex.org/I39343248","display_name":"Humboldt-Universit\u00e4t zu Berlin","ror":"https://ror.org/01hcx6992","country_code":"DE","type":"education","lineage":["https://openalex.org/I39343248"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Weber, Leon","raw_affiliation_strings":["MDC - Max Delbr\u00fcck Center for Molecular Medicine [Berlin] (Robert-R\u00f6ssle-Stra\u00dfe 10, 13125 Berlin, Allemagne - Germany)","HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)"],"affiliations":[{"raw_affiliation_string":"MDC - Max Delbr\u00fcck Center for Molecular Medicine [Berlin] (Robert-R\u00f6ssle-Stra\u00dfe 10, 13125 Berlin, Allemagne - Germany)","institution_ids":["https://openalex.org/I205582932"]},{"raw_affiliation_string":"HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)","institution_ids":["https://openalex.org/I39343248"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080416690","display_name":"Natasha Seelam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Seelam, Natasha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077093430","display_name":"Gabriel Altay","orcid":"https://orcid.org/0000-0002-4120-2907"},"institutions":[{"id":"https://openalex.org/I4210114883","display_name":"Tempus Labs (United States)","ror":"https://ror.org/01gbymr57","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114883"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Altay, Gabriel","raw_affiliation_strings":["Tempus Labs Inc. (Chicago, IL - United States)"],"affiliations":[{"raw_affiliation_string":"Tempus Labs Inc. (Chicago, IL - United States)","institution_ids":["https://openalex.org/I4210114883"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103139732","display_name":"Debajyoti Datta","orcid":"https://orcid.org/0000-0003-0581-6116"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Datta, Debajyoti","raw_affiliation_strings":["University of Virginia (P.O. Box 400229, Charlottesville, VA 22904-4229 - United States)"],"affiliations":[{"raw_affiliation_string":"University of Virginia (P.O. Box 400229, Charlottesville, VA 22904-4229 - United States)","institution_ids":["https://openalex.org/I51556381"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003047299","display_name":"Samuele Garda","orcid":"https://orcid.org/0009-0002-8234-8299"},"institutions":[{"id":"https://openalex.org/I39343248","display_name":"Humboldt-Universit\u00e4t zu Berlin","ror":"https://ror.org/01hcx6992","country_code":"DE","type":"education","lineage":["https://openalex.org/I39343248"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Garda, Samuele","raw_affiliation_strings":["HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)"],"affiliations":[{"raw_affiliation_string":"HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)","institution_ids":["https://openalex.org/I39343248"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110709672","display_name":"Myungsun Kang","orcid":null},"institutions":[{"id":"https://openalex.org/I4391767868","display_name":"Immuneering (United States)","ror":"https://ror.org/00awq8a10","country_code":null,"type":"company","lineage":["https://openalex.org/I4391767868"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kang, Myungsun","raw_affiliation_strings":["Immuneering Corporation (Boston, Massachusetts, 02215, USA - United States)"],"affiliations":[{"raw_affiliation_string":"Immuneering Corporation (Boston, Massachusetts, 02215, USA - United States)","institution_ids":["https://openalex.org/I4391767868"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045036841","display_name":"Ruisi Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Ruisi","raw_affiliation_strings":["BigScience  Project (United States)"],"affiliations":[{"raw_affiliation_string":"BigScience  Project (United States)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016087544","display_name":"Wojciech Kusa","orcid":"https://orcid.org/0000-0003-4420-4147"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kusa, Wojciech","raw_affiliation_strings":["Brown University (Providence, Rhode Island 02912 - United States)"],"affiliations":[{"raw_affiliation_string":"Brown University (Providence, Rhode Island 02912 - United States)","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084836777","display_name":"Samuel Cahyawijaya","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Cahyawijaya, Samuel","raw_affiliation_strings":["HKUST - The Hong Kong University of Science and Technology (Hong Kong SAR China)"],"affiliations":[{"raw_affiliation_string":"HKUST - The Hong Kong University of Science and Technology (Hong Kong SAR China)","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039156783","display_name":"Fabio Barth","orcid":null},"institutions":[{"id":"https://openalex.org/I39343248","display_name":"Humboldt-Universit\u00e4t zu Berlin","ror":"https://ror.org/01hcx6992","country_code":"DE","type":"education","lineage":["https://openalex.org/I39343248"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Barth, Fabio","raw_affiliation_strings":["HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)"],"affiliations":[{"raw_affiliation_string":"HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)","institution_ids":["https://openalex.org/I39343248"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015625692","display_name":"Simon Ott","orcid":"https://orcid.org/0000-0002-9415-3969"},"institutions":[{"id":"https://openalex.org/I129774422","display_name":"University of Vienna","ror":"https://ror.org/03prydq77","country_code":"AT","type":"education","lineage":["https://openalex.org/I129774422"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Ott, Simon","raw_affiliation_strings":["Universit\u00e4t Wien = University of Vienna (Universit\u00e4tsring 1, 1010 Wien - Austria)"],"affiliations":[{"raw_affiliation_string":"Universit\u00e4t Wien = University of Vienna (Universit\u00e4tsring 1, 1010 Wien - Austria)","institution_ids":["https://openalex.org/I129774422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082184364","display_name":"Matthias Samwald","orcid":"https://orcid.org/0000-0002-4855-2571"},"institutions":[{"id":"https://openalex.org/I129774422","display_name":"University of Vienna","ror":"https://ror.org/03prydq77","country_code":"AT","type":"education","lineage":["https://openalex.org/I129774422"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Samwald, Matthias","raw_affiliation_strings":["Universit\u00e4t Wien = University of Vienna (Universit\u00e4tsring 1, 1010 Wien - Austria)"],"affiliations":[{"raw_affiliation_string":"Universit\u00e4t Wien = University of Vienna (Universit\u00e4tsring 1, 1010 Wien - Austria)","institution_ids":["https://openalex.org/I129774422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091126154","display_name":"Stephen Bach","orcid":"https://orcid.org/0000-0003-3359-2594"},"institutions":[{"id":"https://openalex.org/I27804330","display_name":"Brown University","ror":"https://ror.org/05gq02987","country_code":"US","type":"education","lineage":["https://openalex.org/I27804330"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bach, Stephen","raw_affiliation_strings":["Brown University (Providence, Rhode Island 02912 - United States)"],"affiliations":[{"raw_affiliation_string":"Brown University (Providence, Rhode Island 02912 - United States)","institution_ids":["https://openalex.org/I27804330"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076733732","display_name":"Stella Biderman","orcid":"https://orcid.org/0000-0001-8228-1042"},"institutions":[{"id":"https://openalex.org/I1322124587","display_name":"Booz Allen Hamilton (United States)","ror":"https://ror.org/051rcp357","country_code":"US","type":"company","lineage":["https://openalex.org/I1322124587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Biderman, Stella","raw_affiliation_strings":["EleutherAI (United States)","Booz Allen Hamilton (McLean, Virginia - United States)"],"affiliations":[{"raw_affiliation_string":"EleutherAI (United States)","institution_ids":[]},{"raw_affiliation_string":"Booz Allen Hamilton (McLean, Virginia - United States)","institution_ids":["https://openalex.org/I1322124587"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017484078","display_name":"Mario S\u00e4nger","orcid":"https://orcid.org/0000-0002-2950-2587"},"institutions":[{"id":"https://openalex.org/I39343248","display_name":"Humboldt-Universit\u00e4t zu Berlin","ror":"https://ror.org/01hcx6992","country_code":"DE","type":"education","lineage":["https://openalex.org/I39343248"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"S\u00e4nger, Mario","raw_affiliation_strings":["HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)"],"affiliations":[{"raw_affiliation_string":"HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)","institution_ids":["https://openalex.org/I39343248"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100407999","display_name":"Bo Wang","orcid":"https://orcid.org/0000-0001-7158-7046"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Bo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062800749","display_name":"Alison Callahan","orcid":"https://orcid.org/0000-0001-5163-380X"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Callahan, Alison","raw_affiliation_strings":["Stanford University (450 Serra Mall, Stanford, CA 94305-2004 - United States)"],"affiliations":[{"raw_affiliation_string":"Stanford University (450 Serra Mall, Stanford, CA 94305-2004 - United States)","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063930098","display_name":"Daniel Le\u00f3n Peri\u00f1\u00e1n","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Peri\u00f1\u00e1n, Daniel Le\u00f3n","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090317088","display_name":"Th\u00e9o Gigant","orcid":"https://orcid.org/0009-0003-6392-8519"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gigant, Th\u00e9o","raw_affiliation_strings":["BigScience  Project (United States)"],"affiliations":[{"raw_affiliation_string":"BigScience  Project (United States)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002464117","display_name":"Patrick Haller","orcid":"https://orcid.org/0000-0002-8968-7587"},"institutions":[{"id":"https://openalex.org/I39343248","display_name":"Humboldt-Universit\u00e4t zu Berlin","ror":"https://ror.org/01hcx6992","country_code":"DE","type":"education","lineage":["https://openalex.org/I39343248"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Haller, Patrick","raw_affiliation_strings":["HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)"],"affiliations":[{"raw_affiliation_string":"HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)","institution_ids":["https://openalex.org/I39343248"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053958581","display_name":"Jenny Chim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chim, Jenny","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002420931","display_name":"Jose David Posada","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Posada, Jose David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018424619","display_name":"John Michael Giorgi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Giorgi, John Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046572271","display_name":"Karthik Rangasai Sivaraman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sivaraman, Karthik Rangasai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031105903","display_name":"Marc P\u00e0mies","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"P\u00e0mies, Marc","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059744135","display_name":"Marianna Nezhurina","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nezhurina, Marianna","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013867946","display_name":"Robert Martin","orcid":"https://orcid.org/0000-0002-6119-764X"},"institutions":[{"id":"https://openalex.org/I39343248","display_name":"Humboldt-Universit\u00e4t zu Berlin","ror":"https://ror.org/01hcx6992","country_code":"DE","type":"education","lineage":["https://openalex.org/I39343248"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Martin, Robert","raw_affiliation_strings":["HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)"],"affiliations":[{"raw_affiliation_string":"HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)","institution_ids":["https://openalex.org/I39343248"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073907662","display_name":"Michael Cullan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cullan, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080275765","display_name":"Moritz Freidank","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Freidank, Moritz","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011910444","display_name":"Nathan Dahlberg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dahlberg, Nathan","raw_affiliation_strings":["BigScience  Project (United States)"],"affiliations":[{"raw_affiliation_string":"BigScience  Project (United States)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021358528","display_name":"Shubhanshu Mishra","orcid":"https://orcid.org/0000-0001-9931-1690"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mishra, Shubhanshu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110946701","display_name":"Shamik Bose","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bose, Shamik","raw_affiliation_strings":["BigScience  Project (United States)"],"affiliations":[{"raw_affiliation_string":"BigScience  Project (United States)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013217979","display_name":"Nicholas Michio Broad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Broad, Nicholas Michio","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018424518","display_name":"Yanis Labrak","orcid":"https://orcid.org/0000-0003-1072-3862"},"institutions":[{"id":"https://openalex.org/I4210119991","display_name":"Laboratoire Informatique d'Avignon","ror":"https://ror.org/02n399288","country_code":"FR","type":"facility","lineage":["https://openalex.org/I198415970","https://openalex.org/I4210119991"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Labrak, Yanis","raw_affiliation_strings":["LIA - Laboratoire Informatique d'Avignon (339 Chemin des Meinajaries Agroparc BP 1228 84911 Avignon cedex 9 - France)"],"affiliations":[{"raw_affiliation_string":"LIA - Laboratoire Informatique d'Avignon (339 Chemin des Meinajaries Agroparc BP 1228 84911 Avignon cedex 9 - France)","institution_ids":["https://openalex.org/I4210119991"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055953100","display_name":"Shlok S Deshmukh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deshmukh, Shlok S","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041835883","display_name":"Sid Kiblawi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kiblawi, Sid","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034271698","display_name":"Ayush Singh","orcid":"https://orcid.org/0000-0002-4737-3424"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Singh, Ayush","raw_affiliation_strings":["BigScience  Project (United States)"],"affiliations":[{"raw_affiliation_string":"BigScience  Project (United States)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000991839","display_name":"Minh Chien Vu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vu, Minh Chien","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048796501","display_name":"Trishala Neeraj","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Neeraj, Trishala","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078848582","display_name":"Jonas Golde","orcid":"https://orcid.org/0000-0002-8160-3000"},"institutions":[{"id":"https://openalex.org/I39343248","display_name":"Humboldt-Universit\u00e4t zu Berlin","ror":"https://ror.org/01hcx6992","country_code":"DE","type":"education","lineage":["https://openalex.org/I39343248"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Golde, Jonas","raw_affiliation_strings":["HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)"],"affiliations":[{"raw_affiliation_string":"HU Berlin - Humboldt-Universit\u00e4t zu Berlin = Humboldt University of Berlin = Universit\u00e9 Humboldt de Berlin (Humboldt-Universit\u00e4t zu Berlin \u2013 Unter den Linden 6 \u2013 10099 Berlin \u2013 Bundesrepublik Deutschland - Germany)","institution_ids":["https://openalex.org/I39343248"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021727268","display_name":"A. Villanova del Moral","orcid":"https://orcid.org/0000-0003-1727-1045"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"del Moral, Albert Villanova","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5023358367","display_name":"Benjamin Beilharz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Beilharz, Benjamin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":5,"institutions_distinct_count":43,"corresponding_author_ids":["https://openalex.org/A5028277225"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":12,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.9646000266075134,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8366956114768982},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.734957218170166},{"id":"https://openalex.org/keywords/digital-curation","display_name":"Digital curation","score":0.5310426354408264},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5199438333511353},{"id":"https://openalex.org/keywords/data-curation","display_name":"Data curation","score":0.48766911029815674},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48657286167144775},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.453841894865036},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.4508707523345947},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.42768919467926025},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.37956365942955017},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.21514150500297546}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8366956114768982},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.734957218170166},{"id":"https://openalex.org/C2775953033","wikidata":"https://www.wikidata.org/wiki/Q5276060","display_name":"Digital curation","level":2,"score":0.5310426354408264},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5199438333511353},{"id":"https://openalex.org/C91632574","wikidata":"https://www.wikidata.org/wiki/Q15088675","display_name":"Data curation","level":2,"score":0.48766911029815674},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48657286167144775},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.453841894865036},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.4508707523345947},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.42768919467926025},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.37956365942955017},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.21514150500297546},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2206.15076","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.15076","pdf_url":"https://arxiv.org/pdf/2206.15076","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:HAL:hal-03723519v1","is_oa":false,"landing_page_url":"https://hal.science/hal-03723519","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Neural Information Processing Systems (NeurIPS), Nov 2022, New Orleans, United States. &#x27E8;10.48550/arXiv.2206.15076&#x27E9;","raw_type":"Conference papers"},{"id":"doi:10.48550/arxiv.2206.15076","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2206.15076","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2206.15076","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2206.15076","pdf_url":"https://arxiv.org/pdf/2206.15076","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.5099999904632568,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":138,"referenced_works":["https://openalex.org/W8550301","https://openalex.org/W39864132","https://openalex.org/W77347155","https://openalex.org/W97427318","https://openalex.org/W142474614","https://openalex.org/W154351976","https://openalex.org/W1034374084","https://openalex.org/W1482328859","https://openalex.org/W1532254950","https://openalex.org/W1610821757","https://openalex.org/W1630427015","https://openalex.org/W1672757658","https://openalex.org/W1747666062","https://openalex.org/W1757434866","https://openalex.org/W1791866474","https://openalex.org/W1830777114","https://openalex.org/W1850865022","https://openalex.org/W1859283887","https://openalex.org/W1981208470","https://openalex.org/W2005058680","https://openalex.org/W2028102159","https://openalex.org/W2032566933","https://openalex.org/W2047782770","https://openalex.org/W2048140075","https://openalex.org/W2052217781","https://openalex.org/W2084377579","https://openalex.org/W2097960255","https://openalex.org/W2098722636","https://openalex.org/W2100627415","https://openalex.org/W2102514878","https://openalex.org/W2104148262","https://openalex.org/W2109206523","https://openalex.org/W2114039834","https://openalex.org/W2120754707","https://openalex.org/W2126276057","https://openalex.org/W2131285447","https://openalex.org/W2131546905","https://openalex.org/W2132724073","https://openalex.org/W2146408445","https://openalex.org/W2149369282","https://openalex.org/W2160987310","https://openalex.org/W2162442688","https://openalex.org/W2168041406","https://openalex.org/W2169099542","https://openalex.org/W2169974160","https://openalex.org/W2170189740","https://openalex.org/W2171374484","https://openalex.org/W2174775663","https://openalex.org/W2179494026","https://openalex.org/W2250249423","https://openalex.org/W2250469303","https://openalex.org/W2250765544","https://openalex.org/W2414451196","https://openalex.org/W2509884321","https://openalex.org/W2735784619","https://openalex.org/W2740815822","https://openalex.org/W2743028754","https://openalex.org/W2752636837","https://openalex.org/W2761123249","https://openalex.org/W2785446179","https://openalex.org/W2788496822","https://openalex.org/W2794085611","https://openalex.org/W2807245502","https://openalex.org/W2907279416","https://openalex.org/W2908510526","https://openalex.org/W2946690328","https://openalex.org/W2949894546","https://openalex.org/W2950161719","https://openalex.org/W2953126493","https://openalex.org/W2962815673","https://openalex.org/W2963123047","https://openalex.org/W2963866616","https://openalex.org/W2964179635","https://openalex.org/W2970482702","https://openalex.org/W2970511757","https://openalex.org/W2970846466","https://openalex.org/W2970986790","https://openalex.org/W2970998014","https://openalex.org/W2971258845","https://openalex.org/W2975249097","https://openalex.org/W2979250794","https://openalex.org/W2983315135","https://openalex.org/W2985294119","https://openalex.org/W3013036898","https://openalex.org/W3013647094","https://openalex.org/W3016164449","https://openalex.org/W3034999214","https://openalex.org/W3037234229","https://openalex.org/W3037869352","https://openalex.org/W3046375318","https://openalex.org/W3081304278","https://openalex.org/W3094737233","https://openalex.org/W3095525213","https://openalex.org/W3096590546","https://openalex.org/W3099977667","https://openalex.org/W3101757358","https://openalex.org/W3102569150","https://openalex.org/W3115937632","https://openalex.org/W3118813946","https://openalex.org/W3124687886","https://openalex.org/W3125468681","https://openalex.org/W3130583616","https://openalex.org/W3137481621","https://openalex.org/W3154151289","https://openalex.org/W3155312918","https://openalex.org/W3162922479","https://openalex.org/W3168090480","https://openalex.org/W3169653581","https://openalex.org/W3177173791","https://openalex.org/W3177765786","https://openalex.org/W3193378959","https://openalex.org/W3197708183","https://openalex.org/W3197876970","https://openalex.org/W3198156935","https://openalex.org/W3205068155","https://openalex.org/W3212464620","https://openalex.org/W3212496002","https://openalex.org/W4221153690","https://openalex.org/W4223443147","https://openalex.org/W4224275713","https://openalex.org/W4226265641","https://openalex.org/W4254522893","https://openalex.org/W4281644150","https://openalex.org/W4283167130","https://openalex.org/W4285178342","https://openalex.org/W4285185841","https://openalex.org/W4286987939","https://openalex.org/W4287631150","https://openalex.org/W4287692594","https://openalex.org/W4287890934","https://openalex.org/W4288089799","https://openalex.org/W4288359139","https://openalex.org/W4289360595","https://openalex.org/W4293227627","https://openalex.org/W4297234531","https://openalex.org/W4365511667","https://openalex.org/W4393797247","https://openalex.org/W4393920955"],"related_works":["https://openalex.org/W2400390830","https://openalex.org/W2598248896","https://openalex.org/W2907605431","https://openalex.org/W842882028","https://openalex.org/W2138555252","https://openalex.org/W2807479512","https://openalex.org/W2277259138","https://openalex.org/W3135458263","https://openalex.org/W4231012597","https://openalex.org/W3200944274"],"abstract_inverted_index":{"Training":[0],"and":[1,93,106,109,118,137,148,158],"evaluating":[2],"language":[3,20,58,122],"models":[4],"increasingly":[5],"requires":[6],"the":[7,42],"construction":[8],"of":[9,13,37,44,83,145],"meta-datasets":[10],"--diverse":[11],"collections":[12],"curated":[14],"data":[15,71,133],"with":[16,112],"clear":[17],"provenance.":[18],"Natural":[19],"prompting":[21],"has":[22],"recently":[23],"lead":[24],"to":[25,56,104],"improved":[26],"zero-shot":[27,143],"generalization":[28],"by":[29],"transforming":[30],"existing,":[31],"supervised":[32],"datasets":[33,65,105],"into":[34],"a":[35,80],"diversity":[36],"novel":[38],"pretraining":[39],"tasks,":[40],"highlighting":[41],"benefits":[43],"meta-dataset":[45,99],"curation.":[46],"While":[47],"successful":[48],"in":[49,69],"general-domain":[50],"text,":[51],"translating":[52],"these":[53],"data-centric":[54],"approaches":[55],"biomedical":[57,64,85,146],"modeling":[59],"remains":[60],"challenging,":[61],"as":[62],"labeled":[63],"are":[66],"significantly":[67],"underrepresented":[68],"popular":[70],"hubs.":[72],"To":[73],"address":[74],"this":[75],"challenge,":[76],"we":[77],"introduce":[78],"BigBIO":[79,96,152],"community":[81,156],"library":[82],"126+":[84],"NLP":[86],"datasets,":[87],"currently":[88],"covering":[89],"12":[90],"task":[91,130],"categories":[92],"10+":[94],"languages.":[95],"facilitates":[97],"reproducible":[98],"curation":[100],"via":[101],"programmatic":[102],"access":[103],"their":[107],"metadata,":[108],"is":[110,153,159],"compatible":[111],"current":[113],"platforms":[114],"for":[115,129],"prompt":[116],"engineering":[117],"end-to-end":[119],"few/zero":[120],"shot":[121],"model":[123],"evaluation.":[124],"We":[125],"discuss":[126],"our":[127],"process":[128],"schema":[131],"harmonization,":[132],"auditing,":[134],"contribution":[135],"guidelines,":[136],"outline":[138],"two":[139],"illustrative":[140],"use":[141],"cases:":[142],"evaluation":[144],"prompts":[147],"large-scale,":[149],"multi-task":[150],"learning.":[151],"an":[154],"ongoing":[155],"effort":[157],"available":[160],"at":[161],"https://github.com/bigscience-workshop/biomedical":[162]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-16T09:10:04.655348","created_date":"2025-10-10T00:00:00"}
