{"id":"https://openalex.org/W6949416734","doi":"https://doi.org/10.5281/zenodo.14900544","title":"BioToFlow: a corpus annotated with bioinformatics workflows information","display_name":"BioToFlow: a corpus annotated with bioinformatics workflows information","publication_year":2025,"publication_date":"2025-02-19","ids":{"openalex":"https://openalex.org/W6949416734","doi":"https://doi.org/10.5281/zenodo.14900544"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.14900544","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14900544","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.14900544","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Sebe, Cl\u00e9mence","orcid":"https://orcid.org/0000-0003-1988-1875"},"institutions":[{"id":"https://openalex.org/I277688954","display_name":"Universit\u00e9 Paris-Saclay","ror":"https://ror.org/03xjwb503","country_code":"FR","type":"education","lineage":["https://openalex.org/I277688954"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Sebe, Cl\u00e9mence","raw_affiliation_strings":["Universit\u00e9 Paris-Saclay, CNRS, LISN, 91400, Orsay, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Paris-Saclay, CNRS, LISN, 91400, Orsay, France","institution_ids":["https://openalex.org/I277688954","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Cohen-Boulakia, Sarah","orcid":"https://orcid.org/0000-0002-7439-1441"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I277688954","display_name":"Universit\u00e9 Paris-Saclay","ror":"https://ror.org/03xjwb503","country_code":"FR","type":"education","lineage":["https://openalex.org/I277688954"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Cohen-Boulakia, Sarah","raw_affiliation_strings":["Universit\u00e9 Paris-Saclay, CNRS, LISN, 91400, Orsay, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Paris-Saclay, CNRS, LISN, 91400, Orsay, France","institution_ids":["https://openalex.org/I277688954","https://openalex.org/I1294671590"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ferret, Olivier","orcid":"https://orcid.org/0000-0003-0755-2361"},"institutions":[{"id":"https://openalex.org/I277688954","display_name":"Universit\u00e9 Paris-Saclay","ror":"https://ror.org/03xjwb503","country_code":"FR","type":"education","lineage":["https://openalex.org/I277688954"]},{"id":"https://openalex.org/I4210085861","display_name":"Laboratoire d'Int\u00e9gration des Syst\u00e8mes et des Technologies","ror":"https://ror.org/000dbcc61","country_code":"FR","type":"government","lineage":["https://openalex.org/I2738703131","https://openalex.org/I2738703131","https://openalex.org/I277688954","https://openalex.org/I4210085861","https://openalex.org/I4210117989"]},{"id":"https://openalex.org/I2738703131","display_name":"Commissariat \u00e0 l'\u00c9nergie Atomique et aux \u00c9nergies Alternatives","ror":"https://ror.org/00jjx8s55","country_code":"FR","type":"government","lineage":["https://openalex.org/I2738703131"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Ferret, Olivier","raw_affiliation_strings":["Universit\u00e9 Paris-Saclay, CEA, List, F-91120, Palaiseau, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Paris-Saclay, CEA, List, F-91120, Palaiseau, France","institution_ids":["https://openalex.org/I277688954","https://openalex.org/I2738703131","https://openalex.org/I4210085861"]}]},{"author_position":"last","author":{"id":null,"display_name":"N\u00e9v\u00e9ol, Aur\u00e9lie","orcid":"https://orcid.org/0000-0002-1846-9144"},"institutions":[{"id":"https://openalex.org/I277688954","display_name":"Universit\u00e9 Paris-Saclay","ror":"https://ror.org/03xjwb503","country_code":"FR","type":"education","lineage":["https://openalex.org/I277688954"]},{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"N\u00e9v\u00e9ol, Aur\u00e9lie","raw_affiliation_strings":["Universit\u00e9 Paris-Saclay, CNRS, LISN, 91400, Orsay, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 Paris-Saclay, CNRS, LISN, 91400, Orsay, France","institution_ids":["https://openalex.org/I277688954","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I277688954","https://openalex.org/I1294671590"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":null,"topics":[],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.7874000072479248},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.7572000026702881},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.4781000018119812},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.41929998993873596},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.35989999771118164},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.3158999979496002}],"concepts":[{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.7874000072479248},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.7572000026702881},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7149999737739563},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5716999769210815},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5468000173568726},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.4781000018119812},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.42419999837875366},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.41929998993873596},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.35989999771118164},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3531999886035919},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.3158999979496002},{"id":"https://openalex.org/C2777889803","wikidata":"https://www.wikidata.org/wiki/Q25047676","display_name":"Named entity","level":2,"score":0.313400000333786},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2856999933719635}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.14900544","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14900544","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.5281/zenodo.14900544","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.14900544","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"BioToFlow":[0,87],"is":[1,26],"a":[2,47,108,147,250],"corpus":[3,25,149,306],"describing":[4],"bioinformatics":[5,114],"workflows":[6,36,115,278],"in":[7,14,88,107,156],"English":[8],"publications.":[9],"These":[10],"annotations":[11],"are":[12,61,237],"available":[13],"the":[15,69,72,77,229,254,301,305,317,320,334,339],"BRAT":[16],"Rapid":[17],"Annotation":[18],"Tool":[19],"(BRAT)":[20],"standoff":[21],"format":[22],"(https://brat.nlplab.org/standoff.html).":[23],"This":[24,329],"composed":[27],"of":[28,49,153,232,259,304,319,322],"52":[29],"articles":[30,32,60,70,81,178,236,309],"(26":[31],"related":[33],"to":[34,122,345],"Nextflow":[35],"and":[37,76,130,150,241,261,313],"26":[38],"on":[39,113,253],"Snakemake":[40],"workflows,":[41],"randomly":[42],"selected":[43],"from":[44,333],"PubMed)":[45],"with":[46,79,132,239,247,249,307,311,343],"total":[48],"78":[50],"419":[51],"tokens":[52],"27":[53],"786":[54],"annotated":[55,238,310],"tokens.":[56],"Repository":[57],"organisation":[58],"The":[59,234],"separated":[62],"into":[63],"two":[64],"directories:":[65],"one":[66],"containing":[67],"all":[68],"for":[71,82],"training":[73],"phases":[74],"(39)":[75],"other":[78],"13":[80],"test.":[83],"Papers":[84],"Please":[85],"cite":[86],"any":[89],"research":[90],"that":[91],"uses":[92],"or":[93],"extends":[94],"it":[95],":":[96],"Sebe,":[97,160,326],"C.,":[98,265],"Cohen-Boulakia,":[99,162],"S.,":[100],"Ferret,":[101,164],"O.,":[102],"N\u00e9v\u00e9ol,":[103],"A.:":[104],"Extracting":[105],"information":[106],"low-resource":[109],"setting:":[110],"Case":[111],"study":[112],"(2024),":[116],"https://arxiv.org/abs/2411.19295":[117],"In":[118,222,294],"this":[119,223,295],"article":[120,224,296],"accepted":[121],"IDA":[123],"2025":[124],"(in":[125,225,297],"English),":[126],"we":[127,144,227,299],"present":[128,228,300],"*BioToFlow*":[129],"experiments":[131,246],"few":[133],"shot":[134],"named":[135],"entity":[136],"recognition":[137],"(NER)":[138],"using":[139],"an":[140],"autoregressive":[141],"language":[142],"model,":[143],"also":[145],"use":[146],"pre-existing":[148],"test":[151],"integration":[152],"workflow":[154],"knowledge":[155],"NER":[157,248],"models.":[158],"Cl\u00e9mence":[159,325],"Sarah":[161],"Olivier":[163],"Aur\u00e9lie":[165],"N\u00e9v\u00e9ol.":[166],"Extraction":[167,274,286],"d\u2019entit\u00e9s":[168],"nomm\u00e9es":[169],"d\u00e9crivant":[170],"des":[171,177,196,203,212,289],"cha\u00eenes":[172],"de":[173,282],"traitement":[174],"bioinformatiques":[175],"dans":[176],"scientifiques":[179,279],"en":[180,206],"anglais.":[181],"35emes":[182],"Journ\u00e9es":[183],"d\u2019\u00c9tudes":[184],"sur":[185,192,276],"la":[186,283],"Parole":[187],"(JEP":[188],"2024)":[189,200],"31eme":[190],"Conf\u00e9rence":[191],"le":[193,209],"Traitement":[194,210],"Automatique":[195,211],"Langues":[197,213],"Naturelles":[198],"(TALN":[199],"26eme":[201],"Rencontre":[202],"\u00c9tudiants":[204],"Chercheurs":[205],"Informatique":[207],"pour":[208],"(RECITAL":[214],"2024),":[215],"Jul":[216],"2024,":[217],"Toulouse,":[218],"France.":[219],"pp.422-434.":[220],"hal-04623033.":[221],"French),":[226,298],"second":[230],"version":[231,303],"*BioToFlow*.":[233],"new":[235],"entities":[240,312],"attributes.":[242],"We":[243,315],"conduct":[244],"preliminary":[245],"specific":[251],"focus":[252],"memorization":[255],"vs.":[256],"generalization":[257],"abilities":[258],"statistical":[260],"rule-based":[262],"methods.":[263],"Sebe":[264],"N\u00e9v\u00e9ol":[266],"A.,":[267],"Cohen-Boulakia":[268],"S.":[269],"&":[270],"Gaignard":[271],"A.":[272],"(2023).":[273],"d\u2019informations":[275],"les":[277],"\u00e0":[280],"partir":[281],"litt\u00e9rature.":[284],"volume":[285],"et":[287],"Gestion":[288],"Connaissances,":[290],"RNTI-E-39,":[291],"p.":[292],"313.":[293],"first":[302],"24":[308],"relations.":[314],"show":[316],"feasibility":[318],"task":[321],"NER.":[323],"Contact":[324],"clemence.sebe@universite-paris-saclay.fr":[327],"Funding":[328],"work":[330],"received":[331],"support":[332],"National":[335],"Research":[336],"Agency":[337],"under":[338],"France":[340],"2030":[341],"program,":[342],"reference":[344],"ANR-22-PESN-0007.":[346]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
