{"id":"https://openalex.org/W4214535912","doi":"https://doi.org/10.1021/acs.jcim.1c01199","title":"Single Model for Organic and Inorganic Chemical Named Entity Recognition in ChemDataExtractor","display_name":"Single Model for Organic and Inorganic Chemical Named Entity Recognition in ChemDataExtractor","publication_year":2022,"publication_date":"2022-02-24","ids":{"openalex":"https://openalex.org/W4214535912","doi":"https://doi.org/10.1021/acs.jcim.1c01199","pmid":"https://pubmed.ncbi.nlm.nih.gov/35199519"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.1c01199","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.1c01199","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.1c01199","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.1c01199","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084387381","display_name":"Taketomo Isazawa","orcid":"https://orcid.org/0000-0003-0475-403X"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Taketomo Isazawa","raw_affiliation_strings":["Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K"],"raw_orcid":"https://orcid.org/0000-0003-0475-403X","affiliations":[{"raw_affiliation_string":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068607578","display_name":"Jacqueline M. Cole","orcid":"https://orcid.org/0000-0002-1552-8743"},"institutions":[{"id":"https://openalex.org/I1286704778","display_name":"Rutherford Appleton Laboratory","ror":"https://ror.org/03gq8fr08","country_code":"GB","type":"facility","lineage":["https://openalex.org/I1286704778","https://openalex.org/I162524378","https://openalex.org/I4210087105"]},{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Jacqueline M. Cole","raw_affiliation_strings":["Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","Department of Chemical Engineering and Biotechnology, University of Cambridge, West Cambridge Site, Philippa Fawcett Drive, Cambridge CB3 0FS, U.K","ISIS Neutron and Muon Source, STFC Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0QX, U.K"],"raw_orcid":"https://orcid.org/0000-0002-1552-8743","affiliations":[{"raw_affiliation_string":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","institution_ids":["https://openalex.org/I241749"]},{"raw_affiliation_string":"Department of Chemical Engineering and Biotechnology, University of Cambridge, West Cambridge Site, Philippa Fawcett Drive, Cambridge CB3 0FS, U.K","institution_ids":["https://openalex.org/I241749"]},{"raw_affiliation_string":"ISIS Neutron and Muon Source, STFC Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0QX, U.K","institution_ids":["https://openalex.org/I1286704778"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5068607578"],"corresponding_institution_ids":["https://openalex.org/I1286704778","https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":6.2459,"has_fulltext":false,"cited_by_count":45,"citation_normalized_percentile":{"value":0.96866538,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"62","issue":"5","first_page":"1207","last_page":"1213"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.8523537516593933},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7577948570251465},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6494196653366089},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.6174114942550659},{"id":"https://openalex.org/keywords/named-entity","display_name":"Named entity","score":0.5567978024482727},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4870603382587433},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.44905921816825867},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4430049955844879},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4306719899177551},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3485832214355469},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.14292415976524353},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.13524222373962402},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1309954822063446},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09324052929878235}],"concepts":[{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.8523537516593933},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7577948570251465},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6494196653366089},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.6174114942550659},{"id":"https://openalex.org/C2777889803","wikidata":"https://www.wikidata.org/wiki/Q25047676","display_name":"Named entity","level":2,"score":0.5567978024482727},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4870603382587433},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.44905921816825867},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4430049955844879},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4306719899177551},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3485832214355469},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.14292415976524353},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.13524222373962402},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1309954822063446},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09324052929878235},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D007287","descriptor_name":"Inorganic Chemicals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D007287","descriptor_name":"Inorganic Chemicals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D007287","descriptor_name":"Inorganic Chemicals","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":4,"locations":[{"id":"doi:10.1021/acs.jcim.1c01199","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.1c01199","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.1c01199","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:35199519","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35199519","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null},{"id":"pmh:oai:pubmedcentral.nih.gov:9049593","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/9049593","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Chem Inf Model","raw_type":"Text"},{"id":"pmh:oai:purl.org/net/epubs:work/52090135","is_oa":false,"landing_page_url":"http://purl.org/net/epubs/work/52090135","pdf_url":null,"source":{"id":"https://openalex.org/S4306400600","display_name":"ePubs (Science and Technology Facilities Council, Research Councils UK)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I162524378","host_organization_name":"Science and Technology Facilities Council","host_organization_lineage":["https://openalex.org/I162524378"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1021/acs.jcim.1c01199","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.1c01199","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.1c01199","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1531024995","display_name":null,"funder_award_id":"RCSRF1819\\7\\10","funder_id":"https://openalex.org/F4320320005","funder_display_name":"Royal Academy of Engineering"}],"funders":[{"id":"https://openalex.org/F4320307790","display_name":"BASF","ror":"https://ror.org/01q8f6705"},{"id":"https://openalex.org/F4320320005","display_name":"Royal Academy of Engineering","ror":"https://ror.org/0526snb40"},{"id":"https://openalex.org/F4320334632","display_name":"Science and Technology Facilities Council","ror":"https://ror.org/057g20z61"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4214535912.pdf","grobid_xml":"https://content.openalex.org/works/W4214535912.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W1566289585","https://openalex.org/W1632114991","https://openalex.org/W2064675550","https://openalex.org/W2144578941","https://openalex.org/W2149369282","https://openalex.org/W2278830581","https://openalex.org/W2346452181","https://openalex.org/W2523785361","https://openalex.org/W2769387903","https://openalex.org/W2800079696","https://openalex.org/W2801930304","https://openalex.org/W2808304511","https://openalex.org/W2963691697","https://openalex.org/W2964864162","https://openalex.org/W2970771982","https://openalex.org/W2970813009","https://openalex.org/W3200122731"],"related_works":["https://openalex.org/W2032007337","https://openalex.org/W2186562580","https://openalex.org/W1884363728","https://openalex.org/W2983934248","https://openalex.org/W1605730749","https://openalex.org/W2155874911","https://openalex.org/W2916255597","https://openalex.org/W4386977977","https://openalex.org/W4253099099","https://openalex.org/W4200491110"],"abstract_inverted_index":{"Chemical":[0],"Named":[1],"Entity":[2],"Recognition":[3],"(NER)":[4],"forms":[5],"the":[6,13,27,64,82,89,101,109],"basis":[7],"of":[8,24,40,51,96],"information":[9],"extraction":[10],"tasks":[11,19,80],"in":[12,37,43],"chemical":[14],"domain.":[15],"However,":[16],"while":[17],"such":[18,44],"can":[20],"involve":[21],"multiple":[22],"domains":[23],"chemistry":[25],"at":[26,61,81],"same":[28,83],"time,":[29],"currently":[30],"available":[31,93],"named":[32],"entity":[33],"recognizers":[34],"are":[35],"specialized":[36],"one":[38],"part":[39,95],"chemistry,":[41],"resulting":[42],"workflows":[45],"failing":[46],"for":[47,66],"a":[48,56],"biased":[49],"subset":[50],"mentions.":[52],"This":[53],"paper":[54],"presents":[55],"single":[57],"model":[58],"that":[59],"performs":[60],"close":[62],"to":[63,107],"state-of-the-art":[65],"both":[67],"organic":[68],"(CHEMDNER,":[69],"89.7":[70],"F1":[71,77],"score)":[72,78],"and":[73,104],"inorganic":[74],"(Matscholar,":[75],"88.0":[76],"NER":[79,86],"time.":[84],"Our":[85],"system":[87],"utilizing":[88],"Bert":[90],"architecture":[91],"is":[92],"as":[94],"ChemDataExtractor":[97],"2.1,":[98],"along":[99],"with":[100],"data":[102],"sets":[103],"scripts":[105],"used":[106],"train":[108],"model.":[110]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":16},{"year":2022,"cited_by_count":3}],"updated_date":"2026-05-16T08:24:45.110214","created_date":"2025-10-10T00:00:00"}
