{"id":"https://openalex.org/W3200122731","doi":"https://doi.org/10.1021/acs.jcim.1c00446","title":"ChemDataExtractor 2.0: Autopopulated Ontologies for Materials Science","display_name":"ChemDataExtractor 2.0: Autopopulated Ontologies for Materials Science","publication_year":2021,"publication_date":"2021-09-16","ids":{"openalex":"https://openalex.org/W3200122731","doi":"https://doi.org/10.1021/acs.jcim.1c00446","mag":"3200122731","pmid":"https://pubmed.ncbi.nlm.nih.gov/34529432"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.1c00446","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.1c00446","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054221959","display_name":"Juraj Mavra\u010di\u0107","orcid":"https://orcid.org/0000-0001-5457-4550"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Juraj Mavra\u010di\u0107","raw_affiliation_strings":["Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, U.K"],"raw_orcid":"https://orcid.org/0000-0001-5457-4550","affiliations":[{"raw_affiliation_string":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","institution_ids":["https://openalex.org/I241749"]},{"raw_affiliation_string":"Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, U.K","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091047927","display_name":"Callum J. Court","orcid":"https://orcid.org/0000-0002-3919-5605"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Callum J. Court","raw_affiliation_strings":["Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K"],"raw_orcid":"https://orcid.org/0000-0002-3919-5605","affiliations":[{"raw_affiliation_string":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084387381","display_name":"Taketomo Isazawa","orcid":"https://orcid.org/0000-0003-0475-403X"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Taketomo Isazawa","raw_affiliation_strings":["Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001814682","display_name":"Stephen R. Elliott","orcid":"https://orcid.org/0000-0002-8202-8482"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Stephen R. Elliott","raw_affiliation_strings":["Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, U.K"],"raw_orcid":"https://orcid.org/0000-0002-8202-8482","affiliations":[{"raw_affiliation_string":"Department of Chemistry, University of Cambridge, Lensfield Road, Cambridge CB2 1EW, U.K","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068607578","display_name":"Jacqueline M. Cole","orcid":"https://orcid.org/0000-0002-1552-8743"},"institutions":[{"id":"https://openalex.org/I1286704778","display_name":"Rutherford Appleton Laboratory","ror":"https://ror.org/03gq8fr08","country_code":"GB","type":"facility","lineage":["https://openalex.org/I1286704778","https://openalex.org/I162524378","https://openalex.org/I4210087105"]},{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Jacqueline M. Cole","raw_affiliation_strings":["Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","Department of Chemical Engineering and Biotechnology, University of Cambridge, West Cambridge Site, Philippa Fawcett Drive, Cambridge CB3 0FS, U.K","ISIS Neutron and Muon Source, STFC Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0QX, U.K"],"raw_orcid":"https://orcid.org/0000-0002-1552-8743","affiliations":[{"raw_affiliation_string":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","institution_ids":["https://openalex.org/I241749"]},{"raw_affiliation_string":"Department of Chemical Engineering and Biotechnology, University of Cambridge, West Cambridge Site, Philippa Fawcett Drive, Cambridge CB3 0FS, U.K","institution_ids":["https://openalex.org/I241749"]},{"raw_affiliation_string":"ISIS Neutron and Muon Source, STFC Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0QX, U.K","institution_ids":["https://openalex.org/I1286704778"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5068607578"],"corresponding_institution_ids":["https://openalex.org/I1286704778","https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":6.006,"has_fulltext":false,"cited_by_count":136,"citation_normalized_percentile":{"value":0.97578238,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"61","issue":"9","first_page":"4280","last_page":"4289"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9434000253677368,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.819645881652832},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.6234402656555176},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5120139718055725},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.49835872650146484},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.4651196002960205},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4635203778743744},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.44081008434295654},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.43806904554367065},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.427801251411438},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.4208987355232239},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.420095831155777},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.41831839084625244},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1973128616809845}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.819645881652832},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.6234402656555176},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5120139718055725},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.49835872650146484},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.4651196002960205},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4635203778743744},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.44081008434295654},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.43806904554367065},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.427801251411438},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.4208987355232239},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.420095831155777},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.41831839084625244},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1973128616809845},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C34447519","wikidata":"https://www.wikidata.org/wiki/Q179522","display_name":"Market economy","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D000074266","descriptor_name":"Materials Science","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000074266","descriptor_name":"Materials Science","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000074266","descriptor_name":"Materials Science","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":3,"locations":[{"id":"doi:10.1021/acs.jcim.1c00446","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.1c00446","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:34529432","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34529432","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null},{"id":"pmh:oai:purl.org/net/epubs:work/50460000","is_oa":false,"landing_page_url":"http://purl.org/net/epubs/work/50460000","pdf_url":null,"source":{"id":"https://openalex.org/S4306400600","display_name":"ePubs (Science and Technology Facilities Council, Research Councils UK)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I162524378","host_organization_name":"Science and Technology Facilities Council","host_organization_lineage":["https://openalex.org/I162524378"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47999998927116394,"id":"https://metadata.un.org/sdg/14","display_name":"Life below water"}],"awards":[{"id":"https://openalex.org/G1531024995","display_name":null,"funder_award_id":"RCSRF1819\\7\\10","funder_id":"https://openalex.org/F4320320005","funder_display_name":"Royal Academy of Engineering"},{"id":"https://openalex.org/G4069894743","display_name":"EPSRC Centre for Doctoral Training in Computational Methods for Materials Science","funder_award_id":"EP/L015552/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320307790","display_name":"BASF","ror":"https://ror.org/01q8f6705"},{"id":"https://openalex.org/F4320320005","display_name":"Royal Academy of Engineering","ror":"https://ror.org/0526snb40"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"},{"id":"https://openalex.org/F4320334632","display_name":"Science and Technology Facilities Council","ror":"https://ror.org/057g20z61"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1541579345","https://openalex.org/W1992985800","https://openalex.org/W2052198547","https://openalex.org/W2103931177","https://openalex.org/W2121076213","https://openalex.org/W2136794542","https://openalex.org/W2152152303","https://openalex.org/W2161344493","https://openalex.org/W2170214641","https://openalex.org/W2274288262","https://openalex.org/W2278970271","https://openalex.org/W2319902168","https://openalex.org/W2464725281","https://openalex.org/W2479958290","https://openalex.org/W2523785361","https://openalex.org/W2609982526","https://openalex.org/W2610394652","https://openalex.org/W2766362701","https://openalex.org/W2767800135","https://openalex.org/W2796365074","https://openalex.org/W2800722845","https://openalex.org/W2806495905","https://openalex.org/W2808304511","https://openalex.org/W2883528235","https://openalex.org/W2884430236","https://openalex.org/W2893586212","https://openalex.org/W2904992191","https://openalex.org/W2949015218","https://openalex.org/W2953053221","https://openalex.org/W2953641512","https://openalex.org/W2956357421","https://openalex.org/W2963899413","https://openalex.org/W2964864162","https://openalex.org/W2968923792","https://openalex.org/W2982830820","https://openalex.org/W2992302948","https://openalex.org/W3006937107","https://openalex.org/W3008287297","https://openalex.org/W3013318641","https://openalex.org/W3100710928","https://openalex.org/W3103940211"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W2016355461","https://openalex.org/W2386938185","https://openalex.org/W2068844411","https://openalex.org/W1986386500","https://openalex.org/W2793744252","https://openalex.org/W1516746680","https://openalex.org/W2072284316"],"abstract_inverted_index":{"The":[0],"ever-growing":[1],"abundance":[2],"of":[3,17,40,51,63,70,74,133,140,144,151,159,166,187],"data":[4,21,98,111,123],"found":[5],"in":[6,24,27],"heterogeneous":[7],"sources,":[8,84],"such":[9,85],"as":[10,86,112],"scientific":[11,160,194],"publications,":[12],"has":[13,34,44],"forced":[14],"the":[15,25,28,32,37,49,67,106,131,137,184],"development":[16],"automated":[18,61],"techniques":[19],"for":[20,59,100,122],"extraction.":[22],"While":[23],"past,":[26],"physical":[29,103],"sciences":[30],"domain,":[31],"focus":[33],"been":[35,46],"on":[36],"precise":[38],"extraction":[39,50,69,99,124,139],"individual":[41],"properties,":[42],"attention":[43],"recently":[45],"devoted":[47],"to":[48,108],"higher-level":[52],"relationships.":[53],"Here,":[54],"we":[55,129],"present":[56,91],"a":[57,71,78,92,141,180,192],"framework":[58],"an":[60,156,163],"population":[62],"ontologies.":[64],"That":[65],"is,":[66],"direct":[68],"larger":[72],"group":[73],"properties":[75,104],"linked":[76],"by":[77],"semantic":[79],"network.":[80],"We":[81],"exploit":[82],"data-rich":[83],"tables":[87],"within":[88],"documents,":[89],"and":[90,102,125,174],"new":[93],"model":[94],"concept":[95],"that":[96],"enables":[97],"chemical":[101],"with":[105,118],"ability":[107],"organize":[109],"hierarchical":[110],"nested":[113,152],"information.":[114,145],"Combining":[115],"these":[116],"capabilities":[117],"automatically":[119],"generated":[120],"parsers":[121],"forward-looking":[126],"interdependency":[127],"resolution,":[128],"illustrate":[130],"power":[132],"our":[134],"approach":[135],"via":[136],"automatic":[138],"crystallographic":[142],"hierarchy":[143],"This":[146],"includes":[147],"18":[148],"interrelated":[149],"submodels":[150],"data,":[153],"extracted":[154],"from":[155],"evaluation":[157],"set":[158],"articles,":[161],"yielding":[162],"overall":[164],"precision":[165],"92.2%,":[167],"across":[168],"26":[169],"different":[170],"journals.":[171],"Our":[172],"method":[173],"associated":[175],"toolkit,":[176],"ChemDataExtractor":[177],"2.0,":[178],"offers":[179],"key":[181],"step":[182],"toward":[183],"seamless":[185],"integration":[186],"primary":[188],"literature":[189],"sources":[190],"into":[191],"data-driven":[193],"framework.":[195]},"counts_by_year":[{"year":2026,"cited_by_count":9},{"year":2025,"cited_by_count":52},{"year":2024,"cited_by_count":26},{"year":2023,"cited_by_count":32},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
