{"id":"https://openalex.org/W2511570169","doi":"https://doi.org/10.18653/v1/w16-3006","title":"A dictionary- and rule-based system for identification of bacteria and habitats in text","display_name":"A dictionary- and rule-based system for identification of bacteria and habitats in text","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2511570169","doi":"https://doi.org/10.18653/v1/w16-3006","mag":"2511570169"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w16-3006","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-3006","pdf_url":"https://www.aclweb.org/anthology/W16-3006.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th BioNLP Shared Task Workshop","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W16-3006.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047967178","display_name":"Helen Cook","orcid":"https://orcid.org/0000-0002-2659-3727"},"institutions":[{"id":"https://openalex.org/I124055696","display_name":"University of Copenhagen","ror":"https://ror.org/035b05819","country_code":"DK","type":"education","lineage":["https://openalex.org/I124055696"]}],"countries":["DK"],"is_corresponding":true,"raw_author_name":"Helen V Cook","raw_affiliation_strings":["University of Copenhagen"],"affiliations":[{"raw_affiliation_string":"University of Copenhagen","institution_ids":["https://openalex.org/I124055696"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025850900","display_name":"Evangelos Pafilis","orcid":"https://orcid.org/0000-0001-5079-0125"},"institutions":[{"id":"https://openalex.org/I4210131924","display_name":"Hellenic Centre for Marine Research","ror":"https://ror.org/038kffh84","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210131924"]},{"id":"https://openalex.org/I2801134892","display_name":"Novo Nordisk Foundation","ror":"https://ror.org/04txyc737","country_code":"DK","type":"facility","lineage":["https://openalex.org/I2801134892"]},{"id":"https://openalex.org/I124055696","display_name":"University of Copenhagen","ror":"https://ror.org/035b05819","country_code":"DK","type":"education","lineage":["https://openalex.org/I124055696"]},{"id":"https://openalex.org/I2802640032","display_name":"Foundation Center","ror":"https://ror.org/0377f6951","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I2802640032"]}],"countries":["DK","GR","US"],"is_corresponding":false,"raw_author_name":"Evangelos Pafilis","raw_affiliation_strings":["Novo Nordisk Foundation Center for Protein Research,","Faculty of Health and Medical Sciences, University of Copenhagen, Denmark","Institute of Marine Biology,","Biotechnology and Aquaculture, Hellenic Centre for Marine Research, Crete, Greece"],"affiliations":[{"raw_affiliation_string":"Novo Nordisk Foundation Center for Protein Research,","institution_ids":["https://openalex.org/I2801134892","https://openalex.org/I2802640032"]},{"raw_affiliation_string":"Faculty of Health and Medical Sciences, University of Copenhagen, Denmark","institution_ids":["https://openalex.org/I124055696"]},{"raw_affiliation_string":"Institute of Marine Biology,","institution_ids":[]},{"raw_affiliation_string":"Biotechnology and Aquaculture, Hellenic Centre for Marine Research, Crete, Greece","institution_ids":["https://openalex.org/I4210131924"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005772961","display_name":"Lars Juhl Jensen","orcid":"https://orcid.org/0000-0001-7885-715X"},"institutions":[{"id":"https://openalex.org/I124055696","display_name":"University of Copenhagen","ror":"https://ror.org/035b05819","country_code":"DK","type":"education","lineage":["https://openalex.org/I124055696"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Lars Juhl Jensen","raw_affiliation_strings":["University of Copenhagen"],"affiliations":[{"raw_affiliation_string":"University of Copenhagen","institution_ids":["https://openalex.org/I124055696"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5047967178"],"corresponding_institution_ids":["https://openalex.org/I124055696"],"apc_list":null,"apc_paid":null,"fwci":6.1841,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.9651243,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"50","last_page":"55"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9343000054359436,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7691559791564941},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.7242673635482788},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6189360618591309},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6155905723571777},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5154584050178528},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5129106044769287},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.5071582794189453},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5018095970153809},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4916604161262512},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.41576752066612244},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.14329010248184204},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.11310246586799622},{"id":"https://openalex.org/keywords/ecology","display_name":"Ecology","score":0.10142427682876587},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.0712461769580841}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7691559791564941},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.7242673635482788},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6189360618591309},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6155905723571777},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5154584050178528},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5129106044769287},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.5071582794189453},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5018095970153809},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4916604161262512},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.41576752066612244},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.14329010248184204},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.11310246586799622},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.10142427682876587},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0712461769580841},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w16-3006","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-3006","pdf_url":"https://www.aclweb.org/anthology/W16-3006.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th BioNLP Shared Task Workshop","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w16-3006","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-3006","pdf_url":"https://www.aclweb.org/anthology/W16-3006.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th BioNLP Shared Task Workshop","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Life in Land","id":"https://metadata.un.org/sdg/15","score":0.6800000071525574}],"awards":[{"id":"https://openalex.org/G4282887365","display_name":null,"funder_award_id":"NNF14CC0001","funder_id":"https://openalex.org/F4320322436","funder_display_name":"Novo Nordisk"},{"id":"https://openalex.org/G6662978750","display_name":null,"funder_award_id":"NNF14CC0001","funder_id":"https://openalex.org/F4320325957","funder_display_name":"Novo Nordisk Fonden"},{"id":"https://openalex.org/G7031468586","display_name":null,"funder_award_id":"NNF14CC000","funder_id":"https://openalex.org/F4320325957","funder_display_name":"Novo Nordisk Fonden"},{"id":"https://openalex.org/G996161988","display_name":null,"funder_award_id":"14CC0001","funder_id":"https://openalex.org/F4320325957","funder_display_name":"Novo Nordisk Fonden"}],"funders":[{"id":"https://openalex.org/F4320322436","display_name":"Novo Nordisk","ror":"https://ror.org/0435rc536"},{"id":"https://openalex.org/F4320325957","display_name":"Novo Nordisk Fonden","ror":"https://ror.org/04txyc737"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2511570169.pdf","grobid_xml":"https://content.openalex.org/works/W2511570169.grobid-xml"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W1495890774","https://openalex.org/W1700570096","https://openalex.org/W1914733988","https://openalex.org/W1976214824","https://openalex.org/W1979609740","https://openalex.org/W1999373531","https://openalex.org/W2024038403","https://openalex.org/W2035909267","https://openalex.org/W2054720302","https://openalex.org/W2071879021","https://openalex.org/W2096525273","https://openalex.org/W2097877306","https://openalex.org/W2119091936","https://openalex.org/W2122513531","https://openalex.org/W2139118683","https://openalex.org/W2146679517","https://openalex.org/W2147459514","https://openalex.org/W2149352713","https://openalex.org/W2162965868","https://openalex.org/W2254409636","https://openalex.org/W2510201597","https://openalex.org/W2516150684","https://openalex.org/W2739999456","https://openalex.org/W2948270586","https://openalex.org/W2950319932","https://openalex.org/W4231041617","https://openalex.org/W4254979833"],"related_works":["https://openalex.org/W2078793151","https://openalex.org/W2916255597","https://openalex.org/W3017222382","https://openalex.org/W3128216712","https://openalex.org/W3136915866","https://openalex.org/W4390279576","https://openalex.org/W2886890203","https://openalex.org/W4313535650","https://openalex.org/W2287770975","https://openalex.org/W2991463832"],"abstract_inverted_index":{"The":[0,114],"number":[1],"of":[2,15,43,130,151,162],"scientific":[3],"papers":[4],"published":[5],"each":[6],"year":[7],"is":[8,21,37,82],"growing":[9],"exponentially":[10],"and":[11,95,97,109,122,136],"given":[12],"the":[13,41,101,106,110,143,147,165],"rate":[14],"this":[16,28,35],"growth,":[17],"automated":[18],"information":[19,26],"extraction":[20],"needed":[22],"to":[23,38,65,90,100,127,154],"efficiently":[24],"extract":[25],"from":[27],"corpus.":[29],"A":[30],"critical":[31],"first":[32],"step":[33],"in":[34,45,69,105,142],"process":[36],"accurately":[39],"recognize":[40],"names":[42,67,94,125],"entities":[44],"text.":[46],"Previous":[47],"efforts,":[48],"such":[49],"as":[50],"SPECIES,":[51],"have":[52,62,73],"identified":[53],"bacteria":[54,92],"strain":[55,93,124],"names,":[56],"among":[57],"other":[58],"taxonomic":[59],"groups,":[60],"but":[61],"been":[63],"limited":[64],"those":[66],"present":[68,141],"NCBI":[70,107],"taxonomy.":[71],"We":[72],"implemented":[74],"a":[75,85,128],"dictionary-based":[76],"named":[77],"entity":[78],"tagger,":[79],"TagIt,":[80],"that":[81,138],"followed":[83],"by":[84],"rule":[86,115],"based":[87,116],"expansion":[88],"system":[89],"identify":[91],"habitats":[96],"resolve":[98],"them":[99],"closest":[102],"match":[103],"possible":[104],"taxonomy":[108],"OntoBiotope":[111],"ontology":[112],"respectively.":[113],"post":[117],"processing":[118],"steps":[119],"expand":[120],"acronyms,":[121],"extend":[123],"according":[126],"set":[129],"rules,":[131],"which":[132],"captures":[133],"additional":[134],"aliases":[135],"strains":[137],"are":[139],"not":[140],"dictionary.":[144],"TagIt":[145],"has":[146],"best":[148],"performance":[149],"out":[150],"three":[152],"entries":[153],"BioNLP-ST":[155],"BB3":[156],"cat+ner,":[157],"with":[158],"an":[159],"overall":[160],"SER":[161],"0.628":[163],"on":[164],"independent":[166],"test":[167],"set.":[168]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
