{"id":"https://openalex.org/W2900954802","doi":"https://doi.org/10.3390/data3040053","title":"Towards the Construction of a Gold Standard Biomedical Corpus for the Romanian Language","display_name":"Towards the Construction of a Gold Standard Biomedical Corpus for the Romanian Language","publication_year":2018,"publication_date":"2018-11-23","ids":{"openalex":"https://openalex.org/W2900954802","doi":"https://doi.org/10.3390/data3040053","mag":"2900954802"},"language":"en","primary_location":{"id":"doi:10.3390/data3040053","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data3040053","pdf_url":null,"source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.3390/data3040053","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044680920","display_name":"Maria Mitrofan","orcid":"https://orcid.org/0000-0001-7466-2013"},"institutions":[{"id":"https://openalex.org/I58077936","display_name":"Romanian Academy","ror":"https://ror.org/0561n6946","country_code":"RO","type":"archive","lineage":["https://openalex.org/I58077936"]}],"countries":["RO"],"is_corresponding":true,"raw_author_name":"Maria Mitrofan","raw_affiliation_strings":["Romanian Academy Research Institute for Artificial Intelligence, 13 Calea 13 Septembrie, Bucharest 050711, Romania"],"affiliations":[{"raw_affiliation_string":"Romanian Academy Research Institute for Artificial Intelligence, 13 Calea 13 Septembrie, Bucharest 050711, Romania","institution_ids":["https://openalex.org/I58077936"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006567976","display_name":"Verginica Barbu Mititelu","orcid":"https://orcid.org/0000-0003-1945-2587"},"institutions":[{"id":"https://openalex.org/I58077936","display_name":"Romanian Academy","ror":"https://ror.org/0561n6946","country_code":"RO","type":"archive","lineage":["https://openalex.org/I58077936"]}],"countries":["RO"],"is_corresponding":true,"raw_author_name":"Verginica Barbu Mititelu","raw_affiliation_strings":["Romanian Academy Research Institute for Artificial Intelligence, 13 Calea 13 Septembrie, Bucharest 050711, Romania"],"affiliations":[{"raw_affiliation_string":"Romanian Academy Research Institute for Artificial Intelligence, 13 Calea 13 Septembrie, Bucharest 050711, Romania","institution_ids":["https://openalex.org/I58077936"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017663924","display_name":"Grigorina Mitrofan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Grigorina Mitrofan","raw_affiliation_strings":["National Institute of Diabetes and Metabolic Diseases \u201cN.C. Paulescu\u201d, 5-7 Ion Movil\u0103 Street, Bucharest 020475, Romania"],"affiliations":[{"raw_affiliation_string":"National Institute of Diabetes and Metabolic Diseases \u201cN.C. Paulescu\u201d, 5-7 Ion Movil\u0103 Street, Bucharest 020475, Romania","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5006567976","https://openalex.org/A5017663924","https://openalex.org/A5044680920"],"corresponding_institution_ids":["https://openalex.org/I58077936"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.2792,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.58362007,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"3","issue":"4","first_page":"53","last_page":"53"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/romanian","display_name":"Romanian","score":0.9024530649185181},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7882570624351501},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.7665937542915344},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7626849412918091},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6851149797439575},{"id":"https://openalex.org/keywords/gold-standard","display_name":"Gold standard (test)","score":0.6184413433074951},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5711603164672852},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.4867289364337921},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4168367385864258},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3916018009185791},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2644188404083252},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.11721569299697876},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08810830116271973},{"id":"https://openalex.org/keywords/radiology","display_name":"Radiology","score":0.0749131441116333},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.07475340366363525}],"concepts":[{"id":"https://openalex.org/C129400051","wikidata":"https://www.wikidata.org/wiki/Q7913","display_name":"Romanian","level":2,"score":0.9024530649185181},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7882570624351501},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.7665937542915344},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7626849412918091},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6851149797439575},{"id":"https://openalex.org/C40993552","wikidata":"https://www.wikidata.org/wiki/Q514654","display_name":"Gold standard (test)","level":2,"score":0.6184413433074951},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5711603164672852},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.4867289364337921},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4168367385864258},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3916018009185791},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2644188404083252},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.11721569299697876},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08810830116271973},{"id":"https://openalex.org/C126838900","wikidata":"https://www.wikidata.org/wiki/Q77604","display_name":"Radiology","level":1,"score":0.0749131441116333},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.07475340366363525},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/data3040053","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data3040053","pdf_url":null,"source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},{"id":"pmh:oai:RePEc:gam:jdataj:v:3:y:2018:i:4:p:53-:d:185030","is_oa":false,"landing_page_url":"https://www.mdpi.com/2306-5729/3/4/53/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:doaj.org/article:9f847878789b4fbe8add10ccb15c1d9c","is_oa":true,"landing_page_url":"https://doaj.org/article/9f847878789b4fbe8add10ccb15c1d9c","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data, Vol 3, Iss 4, p 53 (2018)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2306-5729/3/4/53/","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3390/data3040053","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/data3040053","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data3040053","pdf_url":null,"source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W8499301","https://openalex.org/W1502112145","https://openalex.org/W1528859321","https://openalex.org/W1652702807","https://openalex.org/W1850865022","https://openalex.org/W2008830554","https://openalex.org/W2020278455","https://openalex.org/W2031420872","https://openalex.org/W2072291547","https://openalex.org/W2149369282","https://openalex.org/W2153804780","https://openalex.org/W2162442688","https://openalex.org/W2169099542","https://openalex.org/W2178441628","https://openalex.org/W2389836769","https://openalex.org/W2626761403","https://openalex.org/W2757729335","https://openalex.org/W2776920218","https://openalex.org/W2805211535","https://openalex.org/W2805957826","https://openalex.org/W2806387542","https://openalex.org/W6602139079","https://openalex.org/W6602788395","https://openalex.org/W6682780409","https://openalex.org/W6691205811","https://openalex.org/W6752043516","https://openalex.org/W6763877760"],"related_works":["https://openalex.org/W650530393","https://openalex.org/W3205433727","https://openalex.org/W2765761072","https://openalex.org/W3205728902","https://openalex.org/W2805241642","https://openalex.org/W2756816606","https://openalex.org/W4299443742","https://openalex.org/W2361861616","https://openalex.org/W20298607","https://openalex.org/W4377865234"],"abstract_inverted_index":{"Gold":[0],"standard":[1,81],"corpora":[2],"(GSCs)":[3],"are":[4,31],"essential":[5],"for":[6,40,82,144,188],"the":[7,24,59,62,71,117,155,166,183,189],"supervised":[8],"training":[9],"and":[10,65,87,104,110,136,138,153,177],"evaluation":[11],"of":[12,23,61,70,151,157,165,185],"systems":[13],"that":[14],"perform":[15],"natural":[16],"language":[17,50,73],"processing":[18],"(NLP)":[19],"tasks.":[20],"Currently,":[21],"most":[22],"resources":[25,51],"used":[26,123,180],"in":[27,33,98],"biomedical":[28,68,83,88],"NLP":[29,186],"tasks":[30],"mainly":[32],"English.":[34],"Little":[35],"effort":[36],"has":[37,129,168],"been":[38,169],"reported":[39],"other":[41],"languages":[42],"including":[43],"Romanian":[44,72,125,190],"and,":[45],"thus,":[46],"access":[47],"to":[48,76,114,181],"such":[49],"is":[52,174],"poor.":[53],"In":[54,112],"this":[55],"paper,":[56],"we":[57,122],"present":[58],"construction":[60],"first":[63],"morphologically":[64],"terminologically":[66],"annotated":[67,143],"corpus":[69,118,167,173],"(MoNERo),":[74],"meant":[75],"serve":[77],"as":[78],"a":[79,124,147],"gold":[80],"part-of-speech":[84],"(POS)":[85],"tagging":[86],"named":[89,160],"entity":[90],"recognition":[91],"(bioNER).":[92],"It":[93],"contains":[94],"14,012":[95],"tokens":[96],"distributed":[97],"three":[99],"medical":[100,159],"subdomains:":[101],"cardiology,":[102],"diabetes":[103],"endocrinology,":[105],"extracted":[106],"from":[107],"books,":[108],"journals":[109],"blogposts.":[111],"order":[113],"automatically":[115],"annotate":[116],"with":[119,146],"POS":[120],"tags,":[121],"tag":[126],"set":[127],"which":[128],"715":[130],"labels,":[131],"while":[132],"diseases,":[133],"anatomy,":[134],"procedures":[135],"chemicals":[137],"drugs":[139],"labels":[140],"were":[141],"manually":[142,170],"bioNER":[145],"Cohen":[148],"Kappa":[149],"coefficient":[150],"92.8%":[152],"revealed":[154],"occurrence":[156],"1877":[158],"entities.":[161],"The":[162,172],"automatic":[163],"annotation":[164],"checked.":[171],"publicly":[175],"available":[176],"can":[178],"be":[179],"facilitate":[182],"development":[184],"algorithms":[187],"language.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
