{"id":"https://openalex.org/W4403025462","doi":"https://doi.org/10.2478/jdis-2024-0029","title":"A comprehensive review of existing corpora and methods for creating annotated corpora for event extraction tasks","display_name":"A comprehensive review of existing corpora and methods for creating annotated corpora for event extraction tasks","publication_year":2024,"publication_date":"2024-10-01","ids":{"openalex":"https://openalex.org/W4403025462","doi":"https://doi.org/10.2478/jdis-2024-0029"},"language":"en","primary_location":{"id":"doi:10.2478/jdis-2024-0029","is_oa":true,"landing_page_url":"https://doi.org/10.2478/jdis-2024-0029","pdf_url":null,"source":{"id":"https://openalex.org/S2764801193","display_name":"Journal of Data and Information Science","issn_l":"2096-157X","issn":["2096-157X","2543-683X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311940","host_organization_name":"Chinese Academy of Sciences","host_organization_lineage":["https://openalex.org/P4310311940"],"host_organization_lineage_names":["Chinese Academy of Sciences"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Science","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.2478/jdis-2024-0029","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108994453","display_name":"Mohd Hafizul Afifi Abdulah","orcid":null},"institutions":[{"id":"https://openalex.org/I203899302","display_name":"Universiti Teknologi Petronas","ror":"https://ror.org/048g2sh07","country_code":"MY","type":"education","lineage":["https://openalex.org/I203899302"]}],"countries":["MY"],"is_corresponding":true,"raw_author_name":"Mohd Hafizul Afifi Abdullah","raw_affiliation_strings":["Centre for Cyber-Physical Systems (C2PS), Universiti Teknologi PETRONAS , Seri Iskandar , Malaysia"],"raw_orcid":"https://orcid.org/0000-0002-1427-2571","affiliations":[{"raw_affiliation_string":"Centre for Cyber-Physical Systems (C2PS), Universiti Teknologi PETRONAS , Seri Iskandar , Malaysia","institution_ids":["https://openalex.org/I203899302"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028915945","display_name":"Norshakirah Aziz","orcid":"https://orcid.org/0000-0001-5563-0286"},"institutions":[{"id":"https://openalex.org/I203899302","display_name":"Universiti Teknologi Petronas","ror":"https://ror.org/048g2sh07","country_code":"MY","type":"education","lineage":["https://openalex.org/I203899302"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Norshakirah Aziz","raw_affiliation_strings":["Centre for Cyber-Physical Systems (C2PS), Universiti Teknologi PETRONAS , Seri Iskandar , Malaysia"],"raw_orcid":"https://orcid.org/0000-0001-5563-0286","affiliations":[{"raw_affiliation_string":"Centre for Cyber-Physical Systems (C2PS), Universiti Teknologi PETRONAS , Seri Iskandar , Malaysia","institution_ids":["https://openalex.org/I203899302"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026473778","display_name":"Said Jadid Abdulkadir","orcid":"https://orcid.org/0000-0003-0038-3702"},"institutions":[{"id":"https://openalex.org/I203899302","display_name":"Universiti Teknologi Petronas","ror":"https://ror.org/048g2sh07","country_code":"MY","type":"education","lineage":["https://openalex.org/I203899302"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Said Jadid Abdulkadir","raw_affiliation_strings":["Centre for Research in Data Science (CeRDaS), Universiti Teknologi PETRONAS , Seri Iskandar , Malaysia"],"raw_orcid":"https://orcid.org/0000-0003-0038-3702","affiliations":[{"raw_affiliation_string":"Centre for Research in Data Science (CeRDaS), Universiti Teknologi PETRONAS , Seri Iskandar , Malaysia","institution_ids":["https://openalex.org/I203899302"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110568900","display_name":"Kashif Hussain","orcid":null},"institutions":[{"id":"https://openalex.org/I156118397","display_name":"Southampton Solent University","ror":"https://ror.org/05xydav19","country_code":"GB","type":"education","lineage":["https://openalex.org/I156118397"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Kashif Hussain","raw_affiliation_strings":["Department of Science and Engineering, Solent University , , Southampton , UK"],"raw_orcid":"https://orcid.org/0000-0003-0011-2726","affiliations":[{"raw_affiliation_string":"Department of Science and Engineering, Solent University , , Southampton , UK","institution_ids":["https://openalex.org/I156118397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005161117","display_name":"Hitham Alhussian","orcid":"https://orcid.org/0000-0003-3947-269X"},"institutions":[{"id":"https://openalex.org/I203899302","display_name":"Universiti Teknologi Petronas","ror":"https://ror.org/048g2sh07","country_code":"MY","type":"education","lineage":["https://openalex.org/I203899302"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Hitham Alhussian","raw_affiliation_strings":["Centre for Research in Data Science (CeRDaS), Universiti Teknologi PETRONAS , Seri Iskandar , Malaysia"],"raw_orcid":"https://orcid.org/0000-0003-3947-269X","affiliations":[{"raw_affiliation_string":"Centre for Research in Data Science (CeRDaS), Universiti Teknologi PETRONAS , Seri Iskandar , Malaysia","institution_ids":["https://openalex.org/I203899302"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038777959","display_name":"Noureen Talpur","orcid":"https://orcid.org/0000-0003-4162-7910"},"institutions":[{"id":"https://openalex.org/I203899302","display_name":"Universiti Teknologi Petronas","ror":"https://ror.org/048g2sh07","country_code":"MY","type":"education","lineage":["https://openalex.org/I203899302"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Noureen Talpur","raw_affiliation_strings":["Centre for Research in Data Science (CeRDaS), Universiti Teknologi PETRONAS , Seri Iskandar , Malaysia"],"raw_orcid":"https://orcid.org/0000-0003-4162-7910","affiliations":[{"raw_affiliation_string":"Centre for Research in Data Science (CeRDaS), Universiti Teknologi PETRONAS , Seri Iskandar , Malaysia","institution_ids":["https://openalex.org/I203899302"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5108994453"],"corresponding_institution_ids":["https://openalex.org/I203899302"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14107974,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"9","issue":"4","first_page":"196","last_page":"238"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7391286492347717},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.6230286359786987},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.5579847693443298},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5298601984977722},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.475314199924469},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46362069249153137}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7391286492347717},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.6230286359786987},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.5579847693443298},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5298601984977722},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.475314199924469},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46362069249153137},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.2478/jdis-2024-0029","is_oa":true,"landing_page_url":"https://doi.org/10.2478/jdis-2024-0029","pdf_url":null,"source":{"id":"https://openalex.org/S2764801193","display_name":"Journal of Data and Information Science","issn_l":"2096-157X","issn":["2096-157X","2543-683X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311940","host_organization_name":"Chinese Academy of Sciences","host_organization_lineage":["https://openalex.org/P4310311940"],"host_organization_lineage_names":["Chinese Academy of Sciences"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Science","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:openaire/39e5dd2f-ed69-4ff7-9a82-36f60adffd0e","is_oa":true,"landing_page_url":"https://pure.solent.ac.uk/en/publications/39e5dd2f-ed69-4ff7-9a82-36f60adffd0e","pdf_url":null,"source":{"id":"https://openalex.org/S4306402589","display_name":"Solent University Research Portal (Solent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I156118397","host_organization_name":"Southampton Solent University","host_organization_lineage":["https://openalex.org/I156118397"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Abdullah, M H A, Aziz, N, Abdulkadir, S J, Hussain, K, Alhussian, H & Talpur, N 2024, 'A comprehensive review of existing corpora and methods for creating annotated corpora for event extraction tasks', Journal of Data and Information Science, vol. 9, no. 4, pp. 196-238. https://doi.org/10.2478/jdis-2024-0029","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:doaj.org/article:da0b237916ea40ac954a326756205742","is_oa":true,"landing_page_url":"https://doaj.org/article/da0b237916ea40ac954a326756205742","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Data and Information Science, Vol 9, Iss 4, Pp 196-238 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.2478/jdis-2024-0029","is_oa":true,"landing_page_url":"https://doi.org/10.2478/jdis-2024-0029","pdf_url":null,"source":{"id":"https://openalex.org/S2764801193","display_name":"Journal of Data and Information Science","issn_l":"2096-157X","issn":["2096-157X","2543-683X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311940","host_organization_name":"Chinese Academy of Sciences","host_organization_lineage":["https://openalex.org/P4310311940"],"host_organization_lineage_names":["Chinese Academy of Sciences"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data and Information Science","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W8550301","https://openalex.org/W55204438","https://openalex.org/W1630427015","https://openalex.org/W1830777114","https://openalex.org/W1870686808","https://openalex.org/W2027979924","https://openalex.org/W2071038402","https://openalex.org/W2098722636","https://openalex.org/W2122496897","https://openalex.org/W2135411671","https://openalex.org/W2149803936","https://openalex.org/W2163107094","https://openalex.org/W2179494026","https://openalex.org/W2518463060","https://openalex.org/W2745665559","https://openalex.org/W2799862684","https://openalex.org/W2803884531","https://openalex.org/W2891553865","https://openalex.org/W2970684294","https://openalex.org/W2981089724","https://openalex.org/W2982149480","https://openalex.org/W2996500226","https://openalex.org/W3035229828","https://openalex.org/W3039924652","https://openalex.org/W3040977741","https://openalex.org/W3092149213","https://openalex.org/W3106098584","https://openalex.org/W3174691968","https://openalex.org/W4205754567","https://openalex.org/W4207058071","https://openalex.org/W4285195854","https://openalex.org/W4297893466","https://openalex.org/W4309674289","https://openalex.org/W4311188592","https://openalex.org/W4318830868","https://openalex.org/W4320015776","https://openalex.org/W4321351832","https://openalex.org/W4323650985","https://openalex.org/W4367051110","https://openalex.org/W4367369752","https://openalex.org/W4385067052","https://openalex.org/W4385572845","https://openalex.org/W4385572893","https://openalex.org/W4386102168","https://openalex.org/W4388691793","https://openalex.org/W4389523857","https://openalex.org/W4391709253","https://openalex.org/W4393212800","https://openalex.org/W4400695050","https://openalex.org/W4404783774","https://openalex.org/W6606243021","https://openalex.org/W6677293857","https://openalex.org/W6678152709","https://openalex.org/W6802937455","https://openalex.org/W6927916266","https://openalex.org/W6982521223","https://openalex.org/W7015753947"],"related_works":["https://openalex.org/W2377297411","https://openalex.org/W3148217948","https://openalex.org/W2375788636","https://openalex.org/W2358561207","https://openalex.org/W2388704129","https://openalex.org/W2392827053","https://openalex.org/W2975617233","https://openalex.org/W2377877252","https://openalex.org/W2362914816","https://openalex.org/W2368651715"],"abstract_inverted_index":{"Abstract":[0],"Purpose":[1],"The":[2],"purpose":[3],"of":[4,14,54,96,116,134,158,178,292],"this":[5,55,203,228,261],"study":[6,21,75,204,216,229,262,305],"is":[7,85],"to":[8,23,50,80,107,176,185,199,239,252,288],"serve":[9,268],"as":[10,171,242,269],"a":[11,132,231,243,270],"comprehensive":[12,284],"review":[13,20],"the":[15,27,43,51,70,114,117,143,150,156,206,235,290,310],"existing":[16,28,44,97,311],"annotated":[17,29,63,135,159,312],"corpora.":[18,111,281],"This":[19,74,112,304],"aims":[22],"provide":[24],"information":[25,308],"on":[26,149,219,234,309],"corpora":[30,100,136,145,160,166,180,208,221,237,313],"for":[31,39,60,69,87,105,123,137,209,273,286,296,314],"event":[32,45,88,98,138,210,297,315],"extraction,":[33],"which":[34],"are":[35,146,163],"limited":[36,183],"but":[37],"essential":[38],"training":[40,124],"and":[41,65,101,120,152,168,225,275,318],"improving":[42],"extraction":[46,89,99,139,211,298,316],"algorithms.":[47,127],"In":[48,141],"addition":[49],"primary":[52],"goal":[53],"study,":[56],"it":[57,248],"provides":[58,102,263,283],"guidelines":[59,104,285],"preparing":[61,274],"an":[62,77,93],"corpus":[64,83,119,293],"suggests":[66],"suitable":[67,86],"tools":[68],"annotation":[71],"task.":[72],"Design/methodology/approach":[73],"employs":[76],"analytical":[78],"approach":[79],"examine":[81],"available":[82,167,207,222],"that":[84,260,266],"tasks.":[90,140,212],"It":[91,282],"offers":[92],"in-depth":[94],"analysis":[95],"systematic":[103],"researchers":[106,287],"develop":[108],"accurate,":[109],"high-quality":[110,165],"ensures":[113],"reliability":[115],"created":[118],"its":[121,240],"suitability":[122],"machine":[125],"learning":[126],"Findings":[128],"Our":[129,215],"exploration":[130],"reveals":[131],"scarcity":[133],"particular,":[142],"English":[144,224,236],"mainly":[147],"focused":[148],"biomedical":[151],"general":[153],"domains.":[154,302],"Despite":[155],"issue":[157],"scarcity,":[161],"there":[162],"several":[164],"widely":[169,249],"used":[170],"benchmark":[172],"datasets.":[173],"However,":[174],"access":[175],"some":[177],"these":[179],"might":[181],"be":[182],"owing":[184,198],"closed-access":[186],"policies":[187],"or":[188],"discontinued":[189],"maintenance":[190],"after":[191],"being":[192],"initially":[193],"released,":[194],"rendering":[195],"them":[196],"inaccessible":[197],"broken":[200],"links.":[201],"Therefore,":[202],"documents":[205],"Research":[213],"limitations":[214],"focuses":[217],"only":[218],"well-known":[220],"in":[223],"Chinese.":[226],"Nevertheless,":[227],"places":[230],"strong":[232],"emphasis":[233],"due":[238],"status":[241],"global":[244],"lingua":[245],"franca,":[246],"making":[247],"understood":[250],"compared":[251],"other":[253],"languages.":[254],"Practical":[255],"implications":[256],"We":[257],"genuinely":[258],"believe":[259],"valuable":[264],"knowledge":[265],"can":[267],"guiding":[271],"framework":[272],"accurately":[276],"annotating":[277],"events":[278],"from":[279],"text":[280],"improve":[289],"quality":[291],"annotations,":[294],"especially":[295],"tasks":[299,317],"across":[300],"various":[301],"Originality/value":[303],"comprehensively":[306],"compiled":[307],"provided":[319],"preparation":[320],"guidelines.":[321]},"counts_by_year":[],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2024-10-02T00:00:00"}
