{"id":"https://openalex.org/W4392350883","doi":"https://doi.org/10.1088/2632-2153/ad2f52","title":"Autonomous data extraction from peer reviewed literature for training machine learning models of oxidation potentials","display_name":"Autonomous data extraction from peer reviewed literature for training machine learning models of oxidation potentials","publication_year":2024,"publication_date":"2024-03-01","ids":{"openalex":"https://openalex.org/W4392350883","doi":"https://doi.org/10.1088/2632-2153/ad2f52"},"language":"en","primary_location":{"id":"doi:10.1088/2632-2153/ad2f52","is_oa":true,"landing_page_url":"https://doi.org/10.1088/2632-2153/ad2f52","pdf_url":"https://iopscience.iop.org/article/10.1088/2632-2153/ad2f52/pdf","source":{"id":"https://openalex.org/S4210200687","display_name":"Machine Learning Science and Technology","issn_l":"2632-2153","issn":["2632-2153"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320083","host_organization_name":"IOP Publishing","host_organization_lineage":["https://openalex.org/P4310320083","https://openalex.org/P4310311669"],"host_organization_lineage_names":["IOP Publishing","Institute of Physics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning: Science and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://iopscience.iop.org/article/10.1088/2632-2153/ad2f52/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020686872","display_name":"Siwoo Lee","orcid":"https://orcid.org/0000-0002-4521-389X"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Siwoo Lee","raw_affiliation_strings":["Chemistry, University of Toronto - St George Campus, 80 St George St., Toronto, Ontario, M5S 3H6, CANADA"],"raw_orcid":"https://orcid.org/0000-0002-4521-389X","affiliations":[{"raw_affiliation_string":"Chemistry, University of Toronto - St George Campus, 80 St George St., Toronto, Ontario, M5S 3H6, CANADA","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024088138","display_name":"Stefan Heinen","orcid":"https://orcid.org/0000-0001-9382-2342"},"institutions":[{"id":"https://openalex.org/I4210127509","display_name":"Vector Institute","ror":"https://ror.org/03kqdja62","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210127509"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Stefan Heinen","raw_affiliation_strings":["Vector Institute, 661 University Ave., Toronto, Ontario, M5G 1M1, CANADA"],"raw_orcid":"https://orcid.org/0000-0001-9382-2342","affiliations":[{"raw_affiliation_string":"Vector Institute, 661 University Ave., Toronto, Ontario, M5G 1M1, CANADA","institution_ids":["https://openalex.org/I4210127509"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074882900","display_name":"Danish Khan","orcid":"https://orcid.org/0000-0001-7529-2202"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Danish Khan","raw_affiliation_strings":["Chemistry, University of Toronto - St George Campus, 80 St George St., Toronto, Ontario, M5S 3H6, CANADA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chemistry, University of Toronto - St George Campus, 80 St George St., Toronto, Ontario, M5S 3H6, CANADA","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088793872","display_name":"O. Anatole von Lilienfeld","orcid":"https://orcid.org/0000-0001-7419-0466"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"O Anatole von Lilienfeld","raw_affiliation_strings":["Departments of Chemistry, Materials Science and Engineering, and Physics, University of Toronto - St George Campus, 80 St George St., Toronto, Ontario, M5S 3H6, CANADA"],"raw_orcid":"https://orcid.org/0000-0001-7419-0466","affiliations":[{"raw_affiliation_string":"Departments of Chemistry, Materials Science and Engineering, and Physics, University of Toronto - St George Campus, 80 St George St., Toronto, Ontario, M5S 3H6, CANADA","institution_ids":["https://openalex.org/I185261750"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5088793872"],"corresponding_institution_ids":["https://openalex.org/I185261750"],"apc_list":{"value":1600,"currency":"GBP","value_usd":1962},"apc_paid":{"value":1600,"currency":"GBP","value_usd":1962},"fwci":1.0446,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.77207897,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"5","issue":"1","first_page":"015052","last_page":"015052"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9484999775886536,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9484999775886536,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.939300000667572,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5817890167236328},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5680308938026428},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.49994516372680664},{"id":"https://openalex.org/keywords/peer-review","display_name":"Peer review","score":0.4800173342227936},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.4791104793548584},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43406766653060913},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4313974976539612},{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.1687150001525879},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.12787491083145142},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.06754732131958008}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5817890167236328},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5680308938026428},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.49994516372680664},{"id":"https://openalex.org/C138368954","wikidata":"https://www.wikidata.org/wiki/Q215028","display_name":"Peer review","level":2,"score":0.4800173342227936},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.4791104793548584},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43406766653060913},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4313974976539612},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.1687150001525879},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.12787491083145142},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06754732131958008},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1088/2632-2153/ad2f52","is_oa":true,"landing_page_url":"https://doi.org/10.1088/2632-2153/ad2f52","pdf_url":"https://iopscience.iop.org/article/10.1088/2632-2153/ad2f52/pdf","source":{"id":"https://openalex.org/S4210200687","display_name":"Machine Learning Science and Technology","issn_l":"2632-2153","issn":["2632-2153"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320083","host_organization_name":"IOP Publishing","host_organization_lineage":["https://openalex.org/P4310320083","https://openalex.org/P4310311669"],"host_organization_lineage_names":["IOP Publishing","Institute of Physics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning: Science and Technology","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:95be18147f884c3e90f822b66fe4feae","is_oa":true,"landing_page_url":"https://doaj.org/article/95be18147f884c3e90f822b66fe4feae","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning: Science and Technology, Vol 5, Iss 1, p 015052 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1088/2632-2153/ad2f52","is_oa":true,"landing_page_url":"https://doi.org/10.1088/2632-2153/ad2f52","pdf_url":"https://iopscience.iop.org/article/10.1088/2632-2153/ad2f52/pdf","source":{"id":"https://openalex.org/S4210200687","display_name":"Machine Learning Science and Technology","issn_l":"2632-2153","issn":["2632-2153"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320083","host_organization_name":"IOP Publishing","host_organization_lineage":["https://openalex.org/P4310320083","https://openalex.org/P4310311669"],"host_organization_lineage_names":["IOP Publishing","Institute of Physics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning: Science and Technology","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.6899999976158142}],"awards":[{"id":"https://openalex.org/G7010704880","display_name":"Quantum Machine Learning: Chemical Reactions with Unprecedented Speed and Accuracy","funder_award_id":"772834","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320322015","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087"},{"id":"https://openalex.org/F4320326644","display_name":"Canada First Research Excellence Fund","ror":null}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4392350883.pdf"},"referenced_works_count":65,"referenced_works":["https://openalex.org/W1901616594","https://openalex.org/W1975147762","https://openalex.org/W1975997599","https://openalex.org/W1981745234","https://openalex.org/W1983755491","https://openalex.org/W2000840224","https://openalex.org/W2019842470","https://openalex.org/W2023271753","https://openalex.org/W2026467865","https://openalex.org/W2029413789","https://openalex.org/W2057954853","https://openalex.org/W2067396899","https://openalex.org/W2080635178","https://openalex.org/W2087556827","https://openalex.org/W2092057169","https://openalex.org/W2114704115","https://openalex.org/W2119255695","https://openalex.org/W2125243015","https://openalex.org/W2143981217","https://openalex.org/W2147256331","https://openalex.org/W2313699126","https://openalex.org/W2323576155","https://openalex.org/W2523785361","https://openalex.org/W2592364803","https://openalex.org/W2604874722","https://openalex.org/W2605801743","https://openalex.org/W2766085136","https://openalex.org/W2791355014","https://openalex.org/W2795387844","https://openalex.org/W2953354214","https://openalex.org/W2976720228","https://openalex.org/W2998962218","https://openalex.org/W3000478925","https://openalex.org/W3036389167","https://openalex.org/W3043309207","https://openalex.org/W3044640769","https://openalex.org/W3045882047","https://openalex.org/W3047471717","https://openalex.org/W3048565185","https://openalex.org/W3085090411","https://openalex.org/W3093819802","https://openalex.org/W3115611223","https://openalex.org/W3120715532","https://openalex.org/W3120951562","https://openalex.org/W3159789740","https://openalex.org/W3167404434","https://openalex.org/W3185227028","https://openalex.org/W3198449425","https://openalex.org/W3202771869","https://openalex.org/W3203912530","https://openalex.org/W3203974877","https://openalex.org/W4200219459","https://openalex.org/W4225496776","https://openalex.org/W4226302501","https://openalex.org/W4251948262","https://openalex.org/W4281619372","https://openalex.org/W4283267143","https://openalex.org/W4284882660","https://openalex.org/W4309563570","https://openalex.org/W4313567241","https://openalex.org/W4318069287","https://openalex.org/W4318486595","https://openalex.org/W4318952054","https://openalex.org/W4320727261","https://openalex.org/W6785794181"],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2810751659","https://openalex.org/W258997015","https://openalex.org/W2997094352","https://openalex.org/W3216976533","https://openalex.org/W100620283","https://openalex.org/W2495260952","https://openalex.org/W4366179611","https://openalex.org/W2996078371"],"abstract_inverted_index":{"Abstract":[0],"We":[1],"present":[2],"an":[3,156,168],"automated":[4],"data-collection":[5],"pipeline":[6,26,179],"involving":[7],"a":[8,13],"convolutional":[9],"neural":[10],"network":[11],"and":[12,35,55,197],"large":[14],"language":[15],"model":[16],"to":[17,29,46,71,123,164,200],"extract":[18],"user-specified":[19],"tabular":[20],"data":[21,50,57,117,129,192],"from":[22,112,161],"peer-reviewed":[23],"literature.":[24],"The":[25,178],"is":[27],"applied":[28],"74":[30],"reports":[31],"published":[32],"between":[33],"1957":[34],"2014":[36],"with":[37,121,131],"experimentally-measured":[38],"oxidation":[39,105,153],"potentials":[40,106],"for":[41,118,189],"592":[42],"organic":[43,110,119,157],"molecules":[44,81,111,120],"(\u22120.75":[45],"3.58":[47],"V).":[48,75],"After":[49],"curation":[51],"(solvents,":[52],"reference":[53],"electrodes,":[54],"missed":[56],"points),":[58],"we":[59,86,102],"trained":[60,99],"multiple":[61,84],"supervised":[62],"machine":[63],"learning":[64],"(ML)":[65],"models":[66],"reaching":[67],"prediction":[68],"errors":[69],"similar":[70],"experimental":[72,77,195],"uncertainty":[73],"(\u223c0.2":[74],"For":[76],"measurements":[78],"of":[79,107,138,144,155,172,194],"identical":[80],"reported":[82],"in":[83,142,170,184],"studies,":[85],"identified":[87],"the":[88,98,113,139,152],"most":[89],"likely":[90],"value":[91],"based":[92],"on":[93,159],"out-of-sample":[94],"ML":[95,100],"predictions.":[96],"Using":[97],"models,":[101],"then":[103],"estimated":[104],"\u223c132k":[108],"small":[109],"QM9":[114,140],"(quantum":[115],"mechanics":[116],"up":[122],"9":[124],"atoms":[125,174],"not":[126],"counting":[127],"hydrogens)":[128],"set,":[130],"predicted":[132],"values":[133],"spanning":[134],"0.21\u20133.46":[135],"V.":[136],"Analysis":[137],"predictions":[141],"terms":[143],"plausible":[145],"descriptor-property":[146],"trends":[147],"suggests":[148],"that":[149],"aliphaticity":[150],"increases":[151],"potential":[154],"molecule":[158],"average":[160],"\u223c1.5":[162],"V":[163],"\u223c2":[165],"V,":[166],"while":[167],"increase":[169],"number":[171],"heavy":[173],"lowers":[175],"it":[176],"systematically.":[177],"introduced":[180],"offers":[181],"significant":[182],"reductions":[183],"human":[185],"labor":[186],"otherwise":[187],"required":[188],"conventional":[190],"manual":[191],"collection":[193],"results,":[196],"exemplifies":[198],"how":[199],"accelerate":[201],"scientific":[202],"research":[203],"through":[204],"automation.":[205]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
