{"id":"https://openalex.org/W2955676798","doi":"https://doi.org/10.18653/v1/w19-5035","title":"Improving Chemical Named Entity Recognition in Patents with Contextualized Word Embeddings","display_name":"Improving Chemical Named Entity Recognition in Patents with Contextualized Word Embeddings","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2955676798","doi":"https://doi.org/10.18653/v1/w19-5035","mag":"2955676798"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w19-5035","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5035","pdf_url":"https://www.aclweb.org/anthology/W19-5035.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th BioNLP Workshop and Shared Task","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W19-5035.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011798464","display_name":"Zenan Zhai","orcid":"https://orcid.org/0000-0003-1391-6950"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Zenan Zhai","raw_affiliation_strings":["The University of Melbourne, Australia;","University of Melbourne"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne, Australia;","institution_ids":["https://openalex.org/I165779595"]},{"raw_affiliation_string":"University of Melbourne","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063933953","display_name":"Dat Quoc Nguyen","orcid":"https://orcid.org/0000-0001-8214-2878"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Dat Quoc Nguyen","raw_affiliation_strings":["The University of Melbourne, Australia;","University of Melbourne"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne, Australia;","institution_ids":["https://openalex.org/I165779595"]},{"raw_affiliation_string":"University of Melbourne","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023359433","display_name":"Saber A. Akhondi","orcid":"https://orcid.org/0000-0003-2855-5633"},"institutions":[{"id":"https://openalex.org/I1318003438","display_name":"RELX Group (Netherlands)","ror":"https://ror.org/02scfj030","country_code":"NL","type":"company","lineage":["https://openalex.org/I1318003438","https://openalex.org/I4210160603"]},{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU","NL"],"is_corresponding":false,"raw_author_name":"Saber Akhondi","raw_affiliation_strings":["Elsevier","University of Melbourne"],"affiliations":[{"raw_affiliation_string":"Elsevier","institution_ids":["https://openalex.org/I1318003438"]},{"raw_affiliation_string":"University of Melbourne","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039568711","display_name":"Camilo Thorne","orcid":"https://orcid.org/0000-0002-6960-772X"},"institutions":[{"id":"https://openalex.org/I1318003438","display_name":"RELX Group (Netherlands)","ror":"https://ror.org/02scfj030","country_code":"NL","type":"company","lineage":["https://openalex.org/I1318003438","https://openalex.org/I4210160603"]},{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU","NL"],"is_corresponding":false,"raw_author_name":"Camilo Thorne","raw_affiliation_strings":["Elsevier","University of Melbourne"],"affiliations":[{"raw_affiliation_string":"Elsevier","institution_ids":["https://openalex.org/I1318003438"]},{"raw_affiliation_string":"University of Melbourne","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023810349","display_name":"Christian Druckenbrodt","orcid":"https://orcid.org/0000-0002-3819-6067"},"institutions":[{"id":"https://openalex.org/I1318003438","display_name":"RELX Group (Netherlands)","ror":"https://ror.org/02scfj030","country_code":"NL","type":"company","lineage":["https://openalex.org/I1318003438","https://openalex.org/I4210160603"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Christian Druckenbrodt","raw_affiliation_strings":["Elsevier","Elsevier;"],"affiliations":[{"raw_affiliation_string":"Elsevier","institution_ids":["https://openalex.org/I1318003438"]},{"raw_affiliation_string":"Elsevier;","institution_ids":["https://openalex.org/I1318003438"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078530959","display_name":"Trevor Cohn","orcid":"https://orcid.org/0000-0003-4363-1673"},"institutions":[{"id":"https://openalex.org/I1318003438","display_name":"RELX Group (Netherlands)","ror":"https://ror.org/02scfj030","country_code":"NL","type":"company","lineage":["https://openalex.org/I1318003438","https://openalex.org/I4210160603"]},{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU","NL"],"is_corresponding":false,"raw_author_name":"Trevor Cohn","raw_affiliation_strings":["The University of Melbourne, Australia;","Elsevier;"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne, Australia;","institution_ids":["https://openalex.org/I165779595"]},{"raw_affiliation_string":"Elsevier;","institution_ids":["https://openalex.org/I1318003438"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063799775","display_name":"Michelle Gregory","orcid":"https://orcid.org/0000-0002-1748-3315"},"institutions":[{"id":"https://openalex.org/I1318003438","display_name":"RELX Group (Netherlands)","ror":"https://ror.org/02scfj030","country_code":"NL","type":"company","lineage":["https://openalex.org/I1318003438","https://openalex.org/I4210160603"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Michelle Gregory","raw_affiliation_strings":["Elsevier","Elsevier;"],"affiliations":[{"raw_affiliation_string":"Elsevier","institution_ids":["https://openalex.org/I1318003438"]},{"raw_affiliation_string":"Elsevier;","institution_ids":["https://openalex.org/I1318003438"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067214173","display_name":"Karin Verspoor","orcid":"https://orcid.org/0000-0002-8661-1544"},"institutions":[{"id":"https://openalex.org/I1318003438","display_name":"RELX Group (Netherlands)","ror":"https://ror.org/02scfj030","country_code":"NL","type":"company","lineage":["https://openalex.org/I1318003438","https://openalex.org/I4210160603"]},{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU","NL"],"is_corresponding":false,"raw_author_name":"Karin Verspoor","raw_affiliation_strings":["The University of Melbourne, Australia;","Elsevier;"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne, Australia;","institution_ids":["https://openalex.org/I165779595"]},{"raw_affiliation_string":"Elsevier;","institution_ids":["https://openalex.org/I1318003438"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5011798464"],"corresponding_institution_ids":["https://openalex.org/I165779595"],"apc_list":null,"apc_paid":null,"fwci":2.15064844,"has_fulltext":true,"cited_by_count":15,"citation_normalized_percentile":{"value":0.8970896,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.7405263185501099},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.6998953819274902},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6956866383552551},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6353722214698792},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5327116847038269},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49953651428222656},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.41305282711982727},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.4113156795501709},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.29363706707954407},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1647476851940155},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07373702526092529}],"concepts":[{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.7405263185501099},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.6998953819274902},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6956866383552551},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6353722214698792},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5327116847038269},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49953651428222656},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.41305282711982727},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.4113156795501709},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.29363706707954407},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1647476851940155},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07373702526092529},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/w19-5035","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5035","pdf_url":"https://www.aclweb.org/anthology/W19-5035.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th BioNLP Workshop and Shared Task","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1907.02679","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1907.02679","pdf_url":"https://arxiv.org/pdf/1907.02679","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2955676798","is_oa":true,"landing_page_url":"http://arxiv.org/pdf/1907.02679.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1907.02679","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1907.02679","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/w19-5035","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5035","pdf_url":"https://www.aclweb.org/anthology/W19-5035.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th BioNLP Workshop and Shared Task","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2955676798.pdf","grobid_xml":"https://content.openalex.org/works/W2955676798.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W1649076462","https://openalex.org/W1891790317","https://openalex.org/W1940872118","https://openalex.org/W2075322787","https://openalex.org/W2080848531","https://openalex.org/W2095596879","https://openalex.org/W2101553882","https://openalex.org/W2121227244","https://openalex.org/W2131774270","https://openalex.org/W2147735450","https://openalex.org/W2147880316","https://openalex.org/W2153579005","https://openalex.org/W2157807817","https://openalex.org/W2169491861","https://openalex.org/W2250539671","https://openalex.org/W2278830581","https://openalex.org/W2296283641","https://openalex.org/W2335791510","https://openalex.org/W2339543475","https://openalex.org/W2346475438","https://openalex.org/W2347081127","https://openalex.org/W2527896214","https://openalex.org/W2540052259","https://openalex.org/W2581661662","https://openalex.org/W2734608416","https://openalex.org/W2738180183","https://openalex.org/W2747592475","https://openalex.org/W2769423117","https://openalex.org/W2787560479","https://openalex.org/W2793978524","https://openalex.org/W2888597024","https://openalex.org/W2904867915","https://openalex.org/W2919115771","https://openalex.org/W2929208326","https://openalex.org/W2963341956","https://openalex.org/W2963940534"],"related_works":["https://openalex.org/W2963341956","https://openalex.org/W2962739339","https://openalex.org/W2884820193","https://openalex.org/W2990896562","https://openalex.org/W2771348687","https://openalex.org/W3158526978","https://openalex.org/W3136058829","https://openalex.org/W2251157338","https://openalex.org/W2970480111","https://openalex.org/W2461583636","https://openalex.org/W2998159147","https://openalex.org/W2577221992","https://openalex.org/W2990536557","https://openalex.org/W2963659646","https://openalex.org/W2974764415","https://openalex.org/W2969068412","https://openalex.org/W3040794994","https://openalex.org/W2922274844","https://openalex.org/W2952935105","https://openalex.org/W2963608876"],"abstract_inverted_index":{"Chemical":[0],"patents":[1,84,124],"are":[2],"an":[3],"important":[4],"resource":[5],"for":[6,56,75],"chemical":[7,11,57,67,77,83,104,123],"information.":[8],"However,":[9],"few":[10],"Named":[12],"Entity":[13],"Recognition":[14],"(NER)":[15],"systems":[16],"have":[17,128],"been":[18],"evaluated":[19],"on":[20,64,79,90,122,132],"patent":[21,68,92],"documents,":[22],"due":[23],"in":[24,82],"part":[25],"to":[26],"their":[27],"structural":[28],"and":[29,51,66,125],"linguistic":[30],"complexity.":[31],"In":[32],"this":[33],"paper,":[34],"we":[35],"explore":[36],"the":[37,76,108],"NER":[38,80,105,133],"performance":[39,81,106],"of":[40,72],"a":[41,129],"BiLSTM-CRF":[42],"model":[43],"utilising":[44],"pre-trained":[45,63],"word":[46,49,54,61,97,119],"embeddings,":[47],"character-level":[48],"representations":[50,55,98],"contextualized":[52,96],"ELMo":[53,101],"patents.":[58],"We":[59,111],"compare":[60],"embeddings":[62,120],"biomedical":[65],"corpora.":[69],"The":[70,88],"effect":[71],"tokenizers":[73,127],"optimized":[74],"domain":[78],"is":[85],"also":[86,112],"explored.":[87],"results":[89],"two":[91],"corpora":[93],"show":[94,113],"that":[95,114],"generated":[99],"from":[100],"substantially":[102],"improve":[103],"w.r.t.":[107],"current":[109],"state-of-the-art.":[110],"domain-specific":[115],"resources":[116],"such":[117],"as":[118],"trained":[121],"chemical-specific":[126],"positive":[130],"impact":[131],"performance.":[134]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":2}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
