{"id":"https://openalex.org/W3201277766","doi":"https://doi.org/10.1021/acs.jcim.1c01017","title":"ReactionDataExtractor: A Tool for Automated Extraction of Information from Chemical Reaction Schemes","display_name":"ReactionDataExtractor: A Tool for Automated Extraction of Information from Chemical Reaction Schemes","publication_year":2021,"publication_date":"2021-09-15","ids":{"openalex":"https://openalex.org/W3201277766","doi":"https://doi.org/10.1021/acs.jcim.1c01017","mag":"3201277766","pmid":"https://pubmed.ncbi.nlm.nih.gov/34525303"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.1c01017","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.1c01017","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026250520","display_name":"Damian M. Wilary","orcid":null},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Damian M. Wilary","raw_affiliation_strings":["Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068607578","display_name":"Jacqueline M. Cole","orcid":"https://orcid.org/0000-0002-1552-8743"},"institutions":[{"id":"https://openalex.org/I1286704778","display_name":"Rutherford Appleton Laboratory","ror":"https://ror.org/03gq8fr08","country_code":"GB","type":"facility","lineage":["https://openalex.org/I1286704778","https://openalex.org/I162524378","https://openalex.org/I4210087105"]},{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Jacqueline M. Cole","raw_affiliation_strings":["Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","Department of Chemical Engineering and Biotechnology, University of Cambridge, West Cambridge Site, Philippa Fawcett Drive, Cambridge CB3 0AS, U.K","ISIS Neutron and Muon Source, STFC Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0QX, U.K"],"raw_orcid":"https://orcid.org/0000-0002-1552-8743","affiliations":[{"raw_affiliation_string":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","institution_ids":["https://openalex.org/I241749"]},{"raw_affiliation_string":"Department of Chemical Engineering and Biotechnology, University of Cambridge, West Cambridge Site, Philippa Fawcett Drive, Cambridge CB3 0AS, U.K","institution_ids":["https://openalex.org/I241749"]},{"raw_affiliation_string":"ISIS Neutron and Muon Source, STFC Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0QX, U.K","institution_ids":["https://openalex.org/I1286704778"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5068607578"],"corresponding_institution_ids":["https://openalex.org/I1286704778","https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":1.6016,"has_fulltext":false,"cited_by_count":28,"citation_normalized_percentile":{"value":0.8270112,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"61","issue":"10","first_page":"4962","last_page":"4974"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11407","display_name":"Innovative Microfluidic and Catalytic Techniques Innovation","score":0.9722999930381775,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7828940153121948},{"id":"https://openalex.org/keywords/bespoke","display_name":"Bespoke","score":0.6723955869674683},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5202375650405884},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4744490385055542},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44717100262641907},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.4455625116825104},{"id":"https://openalex.org/keywords/chemical-database","display_name":"Chemical database","score":0.41493356227874756},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3467344045639038},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.1409040093421936}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7828940153121948},{"id":"https://openalex.org/C44210515","wikidata":"https://www.wikidata.org/wiki/Q16968978","display_name":"Bespoke","level":2,"score":0.6723955869674683},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5202375650405884},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4744490385055542},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44717100262641907},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.4455625116825104},{"id":"https://openalex.org/C203394866","wikidata":"https://www.wikidata.org/wiki/Q2881060","display_name":"Chemical database","level":2,"score":0.41493356227874756},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3467344045639038},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.1409040093421936},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000069558","descriptor_name":"Unsupervised Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069558","descriptor_name":"Unsupervised Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000069558","descriptor_name":"Unsupervised Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D057225","descriptor_name":"Data Mining","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.1021/acs.jcim.1c01017","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.1c01017","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:34525303","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34525303","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null},{"id":"pmh:oai:purl.org/net/epubs:work/50600477","is_oa":false,"landing_page_url":"http://purl.org/net/epubs/work/50600477","pdf_url":null,"source":{"id":"https://openalex.org/S4306400600","display_name":"ePubs (Science and Technology Facilities Council, Research Councils UK)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I162524378","host_organization_name":"Science and Technology Facilities Council","host_organization_lineage":["https://openalex.org/I162524378"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Life in Land","id":"https://metadata.un.org/sdg/15","score":0.5699999928474426}],"awards":[{"id":"https://openalex.org/G1531024995","display_name":null,"funder_award_id":"RCSRF1819\\7\\10","funder_id":"https://openalex.org/F4320320005","funder_display_name":"Royal Academy of Engineering"}],"funders":[{"id":"https://openalex.org/F4320307790","display_name":"BASF","ror":"https://ror.org/01q8f6705"},{"id":"https://openalex.org/F4320320005","display_name":"Royal Academy of Engineering","ror":"https://ror.org/0526snb40"},{"id":"https://openalex.org/F4320334632","display_name":"Science and Technology Facilities Council","ror":"https://ror.org/057g20z61"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1966456689","https://openalex.org/W1975147762","https://openalex.org/W1992985800","https://openalex.org/W2001642682","https://openalex.org/W2021662118","https://openalex.org/W2051810242","https://openalex.org/W2061890234","https://openalex.org/W2088221622","https://openalex.org/W2095905764","https://openalex.org/W2134164499","https://openalex.org/W2160592517","https://openalex.org/W2417328420","https://openalex.org/W2523785361","https://openalex.org/W2600616842","https://openalex.org/W2761456708","https://openalex.org/W2808304511","https://openalex.org/W2904992191","https://openalex.org/W2913999598","https://openalex.org/W2952832141","https://openalex.org/W2963734039","https://openalex.org/W2964332384","https://openalex.org/W2992302948","https://openalex.org/W2998371649","https://openalex.org/W3013318641","https://openalex.org/W3047398431","https://openalex.org/W3097598035","https://openalex.org/W4233344962"],"related_works":["https://openalex.org/W2181465263","https://openalex.org/W3093969907","https://openalex.org/W3202725889","https://openalex.org/W2232750048","https://openalex.org/W2212726445","https://openalex.org/W4244794230","https://openalex.org/W4389712339","https://openalex.org/W2806540758","https://openalex.org/W2494533090","https://openalex.org/W3082526343"],"abstract_inverted_index":{"Chemical":[0],"reaction":[1,14,26,74,81,85],"schemes":[2,15],"are":[3],"commonly":[4],"used":[5,65,123],"for":[6,66,128],"visual":[7],"encapsulation":[8],"of":[9,13,32,45,70,80,99,132,138,158,168],"chemical":[10,17,20,87],"information.":[11],"Figures":[12],"contain":[16],"transformations,":[18],"the":[19,43,67,142,151,164,177],"species":[21],"involved,":[22],"as":[23,25,89,91,124],"well":[24,90],"conditions.":[27],"From":[28],"a":[29,59,125,147],"data-mining":[30],"point":[31],"view,":[33],"they":[34],"constitute":[35],"rich":[36],"sources,":[37],"densely":[38],"packed":[39],"with":[40,107],"knowledge.":[41],"Yet,":[42],"challenge":[44],"automatically":[46],"extracting":[47,133],"data":[48,134,169],"from":[49,72,135,185],"them":[50],"has":[51],"remained":[52],"largely":[53],"untackled.":[54],"This":[55],"work":[56],"presents":[57],"ReactionDataExtractor,":[58],"software":[60],"tool":[61,127,152,173],"that":[62,111],"can":[63,121],"be":[64,122],"automatic":[68],"extraction":[69],"information":[71],"multistep":[73],"schemes.":[75],"Its":[76],"capabilities":[77],"include":[78],"segmentation":[79],"steps,":[82],"regions":[83],"containing":[84],"conditions,":[86],"diagrams,":[88],"optical":[92],"character":[93],"and":[94,101,116,155,161,180],"structure":[95],"recognition.":[96],"A":[97],"combination":[98],"rules":[100],"unsupervised":[102],"machine-learning":[103],"approaches":[104],"is":[105,174,181],"used,":[106],"bespoke":[108],"detection":[109,118],"algorithms":[110],"identify":[112],"arrows,":[113],"structures,":[114],"labels,":[115],"conditions":[117],"algorithms.":[119],"It":[120],"low-maintenance":[126],"database":[129],"generation":[130],"capable":[131],"large":[136],"quantities":[137],"images":[139],"supplied":[140],"by":[141],"user.":[143],"On":[144],"assessment":[145],"using":[146],"self-generated":[148],"evaluation":[149],"set,":[150],"achieved":[153],"precision":[154],"recall":[156],"metrics":[157],"between":[159],"67%":[160],"91%":[162],"in":[163],"six":[165],"core":[166],"areas":[167],"extraction.":[170],"The":[171],"ReactionDataExtractor":[172],"released":[175],"under":[176],"MIT":[178],"license":[179],"available":[182],"to":[183],"download":[184],"http://www.reactiondataextractor.org.":[186]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
