{"id":"https://openalex.org/W3168661259","doi":"https://doi.org/10.1021/acs.jcim.1c00284","title":"Automated Chemical Reaction Extraction from Scientific Literature","display_name":"Automated Chemical Reaction Extraction from Scientific Literature","publication_year":2021,"publication_date":"2021-06-11","ids":{"openalex":"https://openalex.org/W3168661259","doi":"https://doi.org/10.1021/acs.jcim.1c00284","mag":"3168661259","pmid":"https://pubmed.ncbi.nlm.nih.gov/34115937"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.1c00284","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.1c00284","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"review","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101455522","display_name":"Jiang Guo","orcid":"https://orcid.org/0000-0002-9816-805X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang Guo","raw_affiliation_strings":["Computer Science and Artificial Intelligence Laboratory, MIT, Cambridge, Massachusetts 02139, United States"],"affiliations":[{"raw_affiliation_string":"Computer Science and Artificial Intelligence Laboratory, MIT, Cambridge, Massachusetts 02139, United States","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035532915","display_name":"A. Santiago Ibanez-Lopez","orcid":"https://orcid.org/0000-0003-4199-7911"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"A. Santiago Ibanez-Lopez","raw_affiliation_strings":["Computer Science and Artificial Intelligence Laboratory, MIT, Cambridge, Massachusetts 02139, United States"],"affiliations":[{"raw_affiliation_string":"Computer Science and Artificial Intelligence Laboratory, MIT, Cambridge, Massachusetts 02139, United States","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060799967","display_name":"Hanyu Gao","orcid":"https://orcid.org/0000-0002-6346-0739"},"institutions":[{"id":"https://openalex.org/I4210110987","display_name":"IIT@MIT","ror":"https://ror.org/01wp8zh54","country_code":"US","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210110987"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hanyu Gao","raw_affiliation_strings":["Department of Chemical Engineering, MIT, Cambridge, Massachusetts 02139, United States"],"affiliations":[{"raw_affiliation_string":"Department of Chemical Engineering, MIT, Cambridge, Massachusetts 02139, United States","institution_ids":["https://openalex.org/I4210110987"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070398763","display_name":"Victor Quach","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Victor Quach","raw_affiliation_strings":["Computer Science and Artificial Intelligence Laboratory, MIT, Cambridge, Massachusetts 02139, United States"],"affiliations":[{"raw_affiliation_string":"Computer Science and Artificial Intelligence Laboratory, MIT, Cambridge, Massachusetts 02139, United States","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076162644","display_name":"Connor W. Coley","orcid":"https://orcid.org/0000-0002-8271-8723"},"institutions":[{"id":"https://openalex.org/I4210110987","display_name":"IIT@MIT","ror":"https://ror.org/01wp8zh54","country_code":"US","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210110987"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Connor W. Coley","raw_affiliation_strings":["Department of Chemical Engineering, MIT, Cambridge, Massachusetts 02139, United States"],"affiliations":[{"raw_affiliation_string":"Department of Chemical Engineering, MIT, Cambridge, Massachusetts 02139, United States","institution_ids":["https://openalex.org/I4210110987"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071010920","display_name":"Klavs F. Jensen","orcid":"https://orcid.org/0000-0001-7192-580X"},"institutions":[{"id":"https://openalex.org/I4210110987","display_name":"IIT@MIT","ror":"https://ror.org/01wp8zh54","country_code":"US","type":"facility","lineage":["https://openalex.org/I30771326","https://openalex.org/I4210110987"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Klavs F. Jensen","raw_affiliation_strings":["Department of Chemical Engineering, MIT, Cambridge, Massachusetts 02139, United States"],"affiliations":[{"raw_affiliation_string":"Department of Chemical Engineering, MIT, Cambridge, Massachusetts 02139, United States","institution_ids":["https://openalex.org/I4210110987"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010124873","display_name":"Regina Barzilay","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Regina Barzilay","raw_affiliation_strings":["Computer Science and Artificial Intelligence Laboratory, MIT, Cambridge, Massachusetts 02139, United States"],"affiliations":[{"raw_affiliation_string":"Computer Science and Artificial Intelligence Laboratory, MIT, Cambridge, Massachusetts 02139, United States","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5010124873"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.6604,"has_fulltext":false,"cited_by_count":101,"citation_normalized_percentile":{"value":0.97298628,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"62","issue":"9","first_page":"2035","last_page":"2045"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7461561560630798},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.4481312036514282},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.4250315725803375},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.39352136850357056},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38385826349258423},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.327064573764801}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7461561560630798},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.4481312036514282},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.4250315725803375},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39352136850357056},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38385826349258423},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.327064573764801},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1021/acs.jcim.1c00284","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.1c00284","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:34115937","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34115937","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.49000000953674316,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W29374554","https://openalex.org/W1566289585","https://openalex.org/W1623072288","https://openalex.org/W1927487865","https://openalex.org/W1966456689","https://openalex.org/W2046844591","https://openalex.org/W2057954853","https://openalex.org/W2089468765","https://openalex.org/W2090433338","https://openalex.org/W2111044246","https://openalex.org/W2142384583","https://openalex.org/W2147880316","https://openalex.org/W2165671627","https://openalex.org/W2270423142","https://openalex.org/W2329266697","https://openalex.org/W2493916176","https://openalex.org/W2523785361","https://openalex.org/W2525778437","https://openalex.org/W2606363443","https://openalex.org/W2626778328","https://openalex.org/W2747592475","https://openalex.org/W2753921001","https://openalex.org/W2769423117","https://openalex.org/W2799620402","https://openalex.org/W2901942917","https://openalex.org/W2903262661","https://openalex.org/W2911489562","https://openalex.org/W2949239628","https://openalex.org/W2951115492","https://openalex.org/W2962902328","https://openalex.org/W2963341956","https://openalex.org/W2963734039","https://openalex.org/W3007750971","https://openalex.org/W3043647281","https://openalex.org/W3091684735","https://openalex.org/W3094332970","https://openalex.org/W3101155908","https://openalex.org/W3103383152","https://openalex.org/W6600424091","https://openalex.org/W6601211009"],"related_works":["https://openalex.org/W2045155990","https://openalex.org/W4313163053","https://openalex.org/W4300973204","https://openalex.org/W3045811229","https://openalex.org/W1483408780","https://openalex.org/W4284884309","https://openalex.org/W2908749798","https://openalex.org/W4243842598","https://openalex.org/W2105980483","https://openalex.org/W1514435881"],"abstract_inverted_index":{"Access":[0],"to":[1,99,191],"structured":[2],"chemical":[3,56,74,121],"reaction":[4,25,114,122,134],"data":[5,196],"is":[6,44],"of":[7,55,84,109,132,163,202,215],"key":[8],"importance":[9],"for":[10,70],"chemists":[11],"in":[12,17,58,96,106,204,209],"performing":[13],"bench":[14],"experiments":[15],"and":[16,41,47,90,129,141,153,166,183,195,207],"modern":[18],"applications":[19],"like":[20],"computer-aided":[21],"drug":[22],"design.":[23],"Existing":[24],"databases":[26],"are":[27,87,103,172,178,189],"generally":[28],"populated":[29],"by":[30],"human":[31],"curators":[32],"through":[33],"manual":[34],"abstraction":[35],"from":[36,73],"published":[37],"literature":[38,57],"(e.g.,":[39],"patents":[40],"journals),":[42],"which":[43,86,177],"time":[45],"consuming":[46],"labor":[48],"intensive,":[49],"especially":[50],"with":[51,156,212],"the":[52,81,93,113,146],"exponential":[53],"growth":[54],"recent":[59],"years.":[60],"In":[61],"this":[62],"study,":[63],"we":[64,117],"focus":[65],"on":[66],"developing":[67],"automated":[68],"methods":[69],"extracting":[71],"reactions":[72],"literature.":[75],"We":[76,144],"consider":[77],"journal":[78],"publications":[79],"as":[80,137,148],"target":[82],"source":[83],"information,":[85],"more":[88],"comprehensive":[89],"better":[91],"represent":[92],"latest":[94],"developments":[95],"chemistry":[97],"compared":[98],"patents;":[100],"however,":[101],"they":[102],"less":[104],"formulaic":[105],"their":[107],"descriptions":[108],"reactions.":[110,217],"To":[111],"implement":[112],"extraction":[115,206],"system,":[116],"first":[118],"devised":[119],"a":[120,126,130,149,157],"schema,":[123],"primarily":[124],"including":[125],"central":[127],"<i>product</i>,":[128],"set":[131],"associated":[133],"roles":[135],"such":[136],"<i>reactants</i>,":[138],"<i>catalyst</i>,":[139],"<i>solvent</i>,":[140],"so":[142],"on.":[143],"formulate":[145],"task":[147],"structure":[150],"prediction":[151],"problem":[152],"solve":[154],"it":[155],"two-stage":[158],"deep":[159],"learning":[160],"framework":[161],"consisting":[162],"<i>product":[164],"extraction</i>":[165],"<i>reaction":[167],"role":[168,210],"labeling</i>.":[169],"Both":[170],"models":[171,188],"built":[173],"upon":[174],"Transformer-based":[175],"encoders,":[176],"adaptively":[179],"pretrained":[180],"using":[181],"domain":[182],"task-relevant":[184],"unlabeled":[185],"data.":[186],"Our":[187],"shown":[190],"be":[192],"both":[193],"effective":[194],"efficient,":[197],"achieving":[198],"an":[199],"F1":[200],"score":[201],"76.2%":[203],"product":[205],"78.7%":[208],"extraction,":[211],"only":[213],"hundreds":[214],"annotated":[216]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":35},{"year":2023,"cited_by_count":27},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
