{"id":"https://openalex.org/W3175270449","doi":"https://doi.org/10.1186/s13321-021-00554-8","title":"Scalable estimator of the diversity for de novo molecular generation resulting in a more robust QM dataset (OD9) and a more efficient molecular optimization","display_name":"Scalable estimator of the diversity for de novo molecular generation resulting in a more robust QM dataset (OD9) and a more efficient molecular optimization","publication_year":2021,"publication_date":"2021-10-02","ids":{"openalex":"https://openalex.org/W3175270449","doi":"https://doi.org/10.1186/s13321-021-00554-8","mag":"3175270449","pmid":"https://pubmed.ncbi.nlm.nih.gov/34600576"},"language":"en","primary_location":{"id":"doi:10.1186/s13321-021-00554-8","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-021-00554-8","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/s13321-021-00554-8","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/s13321-021-00554-8","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018371449","display_name":"Jules Leguy","orcid":"https://orcid.org/0000-0002-6808-7806"},"institutions":[{"id":"https://openalex.org/I49451733","display_name":"Universit\u00e9 d'Angers","ror":"https://ror.org/04yrqp957","country_code":"FR","type":"education","lineage":["https://openalex.org/I49451733"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Jules Leguy","raw_affiliation_strings":["Univ Angers, LERIA, SFR MATHSTIC, 49000, Angers, France","LERIA - Laboratoire d'Etudes et de Recherche en Informatique d'Angers (Universit\u00e9 d'Angers, LERIA - Facult\u00e9s des Sciences, 2 Boulevard Lavoisier, B\u00e2timents G et H - 49045 Angers CEDEX 01 - France)"],"raw_orcid":"https://orcid.org/0000-0002-6808-7806","affiliations":[{"raw_affiliation_string":"Univ Angers, LERIA, SFR MATHSTIC, 49000, Angers, France","institution_ids":["https://openalex.org/I49451733"]},{"raw_affiliation_string":"LERIA - Laboratoire d'Etudes et de Recherche en Informatique d'Angers (Universit\u00e9 d'Angers, LERIA - Facult\u00e9s des Sciences, 2 Boulevard Lavoisier, B\u00e2timents G et H - 49045 Angers CEDEX 01 - France)","institution_ids":["https://openalex.org/I49451733"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111263059","display_name":"Marta Glavatskikh","orcid":null},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210137122","display_name":"Institut des Sciences et Technologies Mol\u00e9culaires d'Angers","ror":"https://ror.org/039yyx623","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I4210128300","https://openalex.org/I4210137122","https://openalex.org/I49451733"]},{"id":"https://openalex.org/I49451733","display_name":"Universit\u00e9 d'Angers","ror":"https://ror.org/04yrqp957","country_code":"FR","type":"education","lineage":["https://openalex.org/I49451733"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Marta Glavatskikh","raw_affiliation_strings":["Univ Angers, CNRS, MOLTECH-ANJOU, SFR MATRIX, 49000, Angers, France","Univ Angers, LERIA, SFR MATHSTIC, 49000, Angers, France","LERIA - Laboratoire d'Etudes et de Recherche en Informatique d'Angers (Universit\u00e9 d'Angers, LERIA - Facult\u00e9s des Sciences, 2 Boulevard Lavoisier, B\u00e2timents G et H - 49045 Angers CEDEX 01 - France)"],"raw_orcid":"https://orcid.org/0000-0002-4691-0240","affiliations":[{"raw_affiliation_string":"Univ Angers, CNRS, MOLTECH-ANJOU, SFR MATRIX, 49000, Angers, France","institution_ids":["https://openalex.org/I4210137122","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"Univ Angers, LERIA, SFR MATHSTIC, 49000, Angers, France","institution_ids":["https://openalex.org/I49451733"]},{"raw_affiliation_string":"LERIA - Laboratoire d'Etudes et de Recherche en Informatique d'Angers (Universit\u00e9 d'Angers, LERIA - Facult\u00e9s des Sciences, 2 Boulevard Lavoisier, B\u00e2timents G et H - 49045 Angers CEDEX 01 - France)","institution_ids":["https://openalex.org/I49451733"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027991232","display_name":"Thomas Cauchy","orcid":"https://orcid.org/0000-0003-4259-3257"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210137122","display_name":"Institut des Sciences et Technologies Mol\u00e9culaires d'Angers","ror":"https://ror.org/039yyx623","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I4210128300","https://openalex.org/I4210137122","https://openalex.org/I49451733"]},{"id":"https://openalex.org/I49451733","display_name":"Universit\u00e9 d'Angers","ror":"https://ror.org/04yrqp957","country_code":"FR","type":"education","lineage":["https://openalex.org/I49451733"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Thomas Cauchy","raw_affiliation_strings":["Univ Angers, CNRS, MOLTECH-ANJOU, SFR MATRIX, 49000, Angers, France. thomas.cauchy@univ-angers.fr","MOLTECH-Anjou (MOLTECH-Anjou UMR 6200 CNRS - Universit\u00e9 d'Angers, Facult\u00e9 des Sciences, 2 Boulevard Lavoisier, B\u00e2timents K et Db - 49045 Angers CEDEX 01 - France)","MOLTECH-ANJOU-CIMI - MOLTECH-Anjou - \u00c9quipe CIMI (MOLTECH-Anjou UMR 6200 CNRS - Universit\u00e9 d'Angers, Facult\u00e9 des Sciences, 2 Boulevard Lavoisier, B\u00e2timents K et Db - 49045 Angers CEDEX 01 - France)"],"raw_orcid":"https://orcid.org/0000-0003-4259-3257","affiliations":[{"raw_affiliation_string":"Univ Angers, CNRS, MOLTECH-ANJOU, SFR MATRIX, 49000, Angers, France. thomas.cauchy@univ-angers.fr","institution_ids":["https://openalex.org/I4210137122","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"MOLTECH-Anjou (MOLTECH-Anjou UMR 6200 CNRS - Universit\u00e9 d'Angers, Facult\u00e9 des Sciences, 2 Boulevard Lavoisier, B\u00e2timents K et Db - 49045 Angers CEDEX 01 - France)","institution_ids":["https://openalex.org/I4210137122","https://openalex.org/I49451733","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"MOLTECH-ANJOU-CIMI - MOLTECH-Anjou - \u00c9quipe CIMI (MOLTECH-Anjou UMR 6200 CNRS - Universit\u00e9 d'Angers, Facult\u00e9 des Sciences, 2 Boulevard Lavoisier, B\u00e2timents K et Db - 49045 Angers CEDEX 01 - France)","institution_ids":["https://openalex.org/I4210137122","https://openalex.org/I49451733","https://openalex.org/I1294671590"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018380335","display_name":"Benoit Da Mota","orcid":"https://orcid.org/0000-0003-0807-8892"},"institutions":[{"id":"https://openalex.org/I49451733","display_name":"Universit\u00e9 d'Angers","ror":"https://ror.org/04yrqp957","country_code":"FR","type":"education","lineage":["https://openalex.org/I49451733"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Benoit Da\u00a0Mota","raw_affiliation_strings":["Univ Angers, LERIA, SFR MATHSTIC, 49000, Angers, France. benoit.damota@univ-angers.fr","LERIA - Laboratoire d'Etudes et de Recherche en Informatique d'Angers (Universit\u00e9 d'Angers, LERIA - Facult\u00e9s des Sciences, 2 Boulevard Lavoisier, B\u00e2timents G et H - 49045 Angers CEDEX 01 - France)"],"raw_orcid":"https://orcid.org/0000-0003-0807-8892","affiliations":[{"raw_affiliation_string":"Univ Angers, LERIA, SFR MATHSTIC, 49000, Angers, France. benoit.damota@univ-angers.fr","institution_ids":["https://openalex.org/I49451733"]},{"raw_affiliation_string":"LERIA - Laboratoire d'Etudes et de Recherche en Informatique d'Angers (Universit\u00e9 d'Angers, LERIA - Facult\u00e9s des Sciences, 2 Boulevard Lavoisier, B\u00e2timents G et H - 49045 Angers CEDEX 01 - France)","institution_ids":["https://openalex.org/I49451733"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5018371449"],"corresponding_institution_ids":["https://openalex.org/I49451733"],"apc_list":{"value":1290,"currency":"GBP","value_usd":1582},"apc_paid":{"value":1290,"currency":"GBP","value_usd":1582},"fwci":1.3627,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.83631553,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"13","issue":"1","first_page":"76","last_page":"76"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11407","display_name":"Innovative Microfluidic and Catalytic Techniques Innovation","score":0.9692999720573425,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7445836067199707},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7206699252128601},{"id":"https://openalex.org/keywords/generalizability-theory","display_name":"Generalizability theory","score":0.5782527923583984},{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.5723483562469482},{"id":"https://openalex.org/keywords/diversity","display_name":"Diversity (politics)","score":0.5443576574325562},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5320761203765869},{"id":"https://openalex.org/keywords/chemical-space","display_name":"Chemical space","score":0.5162915587425232},{"id":"https://openalex.org/keywords/quantum-chemical","display_name":"Quantum chemical","score":0.5141480565071106},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.4768187403678894},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.43439170718193054},{"id":"https://openalex.org/keywords/molecule","display_name":"Molecule","score":0.2666166126728058},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.20649278163909912},{"id":"https://openalex.org/keywords/bioinformatics","display_name":"Bioinformatics","score":0.1676240861415863},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14943331480026245},{"id":"https://openalex.org/keywords/drug-discovery","display_name":"Drug discovery","score":0.14789330959320068},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.12589755654335022},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.09091389179229736},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.08090662956237793}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7445836067199707},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7206699252128601},{"id":"https://openalex.org/C27158222","wikidata":"https://www.wikidata.org/wiki/Q5532422","display_name":"Generalizability theory","level":2,"score":0.5782527923583984},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.5723483562469482},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.5443576574325562},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5320761203765869},{"id":"https://openalex.org/C99726746","wikidata":"https://www.wikidata.org/wiki/Q906396","display_name":"Chemical space","level":3,"score":0.5162915587425232},{"id":"https://openalex.org/C2991951333","wikidata":"https://www.wikidata.org/wiki/Q188403","display_name":"Quantum chemical","level":3,"score":0.5141480565071106},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.4768187403678894},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.43439170718193054},{"id":"https://openalex.org/C32909587","wikidata":"https://www.wikidata.org/wiki/Q11369","display_name":"Molecule","level":2,"score":0.2666166126728058},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.20649278163909912},{"id":"https://openalex.org/C60644358","wikidata":"https://www.wikidata.org/wiki/Q128570","display_name":"Bioinformatics","level":1,"score":0.1676240861415863},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14943331480026245},{"id":"https://openalex.org/C74187038","wikidata":"https://www.wikidata.org/wiki/Q1418791","display_name":"Drug discovery","level":2,"score":0.14789330959320068},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.12589755654335022},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.09091389179229736},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.08090662956237793},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1186/s13321-021-00554-8","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-021-00554-8","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/s13321-021-00554-8","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},{"id":"pmid:34600576","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34600576","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of cheminformatics","raw_type":null},{"id":"pmh:oai:HAL:hal-04033836v1","is_oa":false,"landing_page_url":"https://univ-angers.hal.science/hal-04033836","pdf_url":null,"source":{"id":"https://openalex.org/S4406922466","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, 2021, 13 (1), pp.76. &#x27E8;10.1186/s13321-021-00554-8&#x27E9;","raw_type":"Journal articles"},{"id":"pmh:oai:doaj.org/article:5ee95c78f5cb4002b6b284051ae3f53b","is_oa":true,"landing_page_url":"https://doaj.org/article/5ee95c78f5cb4002b6b284051ae3f53b","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Cheminformatics, Vol 13, Iss 1, Pp 1-17 (2021)","raw_type":"article"},{"id":"pmh:oai:europepmc.org:7381425","is_oa":true,"landing_page_url":"http://europepmc.org/pmc/articles/PMC8487551","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"pmh:oai:pubmedcentral.nih.gov:8487551","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/8487551","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"J Cheminform","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1186/s13321-021-00554-8","is_oa":true,"landing_page_url":"https://doi.org/10.1186/s13321-021-00554-8","pdf_url":"https://jcheminf.biomedcentral.com/track/pdf/10.1186/s13321-021-00554-8","source":{"id":"https://openalex.org/S180838163","display_name":"Journal of Cheminformatics","issn_l":"1758-2946","issn":["1758-2946"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320256","host_organization_name":"BioMed Central","host_organization_lineage":["https://openalex.org/P4310320256","https://openalex.org/P4310319965"],"host_organization_lineage_names":["BioMed Central","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Cheminformatics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.41999998688697815}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322890","display_name":"Minist\u00e8re de l'Enseignement Sup\u00e9rieur et de la Recherche","ror":"https://ror.org/03sjk9a61"},{"id":"https://openalex.org/F4320325007","display_name":"Universite Angers","ror":"https://ror.org/04yrqp957"},{"id":"https://openalex.org/F4320327974","display_name":"Conseil R\u00e9gional des Pays de la Loire","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3175270449.pdf","grobid_xml":"https://content.openalex.org/works/W3175270449.grobid-xml"},"referenced_works_count":56,"referenced_works":["https://openalex.org/W1965092590","https://openalex.org/W1965555277","https://openalex.org/W1981622697","https://openalex.org/W1988037271","https://openalex.org/W2001883493","https://openalex.org/W2009046773","https://openalex.org/W2022476850","https://openalex.org/W2028326521","https://openalex.org/W2032558547","https://openalex.org/W2034549041","https://openalex.org/W2038617772","https://openalex.org/W2045270356","https://openalex.org/W2060531713","https://openalex.org/W2080635178","https://openalex.org/W2127638407","https://openalex.org/W2142863519","https://openalex.org/W2160592148","https://openalex.org/W2162320224","https://openalex.org/W2206840988","https://openalex.org/W2478294658","https://openalex.org/W2548357532","https://openalex.org/W2551556495","https://openalex.org/W2561728683","https://openalex.org/W2605908560","https://openalex.org/W2613900957","https://openalex.org/W2622206241","https://openalex.org/W2802872171","https://openalex.org/W2810220676","https://openalex.org/W2887447356","https://openalex.org/W2898907833","https://openalex.org/W2900694120","https://openalex.org/W2901679103","https://openalex.org/W2905012389","https://openalex.org/W2921533983","https://openalex.org/W2950708653","https://openalex.org/W2953128081","https://openalex.org/W2955543893","https://openalex.org/W2961960358","https://openalex.org/W2962764565","https://openalex.org/W2983997668","https://openalex.org/W2988203096","https://openalex.org/W2997181101","https://openalex.org/W3005441379","https://openalex.org/W3036993656","https://openalex.org/W3085315008","https://openalex.org/W3087570960","https://openalex.org/W3103098434","https://openalex.org/W3104956673","https://openalex.org/W3106761016","https://openalex.org/W3112587790","https://openalex.org/W3116865743","https://openalex.org/W3185028434","https://openalex.org/W4233924556","https://openalex.org/W4252028749","https://openalex.org/W4285719527","https://openalex.org/W6610423178"],"related_works":["https://openalex.org/W2118717649","https://openalex.org/W2413243053","https://openalex.org/W410723623","https://openalex.org/W2015341305","https://openalex.org/W2035068594","https://openalex.org/W4225593417","https://openalex.org/W2573498121","https://openalex.org/W2022843255","https://openalex.org/W4387527744","https://openalex.org/W3006234984"],"abstract_inverted_index":{"Chemical":[0],"diversity":[1,102,109,139,157,210],"is":[2,17,35,203],"one":[3],"of":[4,26,89,107,120,134,140,167,215,224],"the":[5,33,44,108,118,132,138,149,156,173,188,213,218],"key":[6],"term":[7],"when":[8],"dealing":[9],"with":[10,100,180],"machine":[11],"learning":[12],"and":[13,76,96,113],"molecular":[14,115,196],"generation.":[15],"This":[16],"particularly":[18],"true":[19],"for":[20,144],"quantum":[21,47,86],"chemical":[22,48,52,81,87],"datasets.":[23,175],"The":[24,176],"composition":[25],"which":[27],"should":[28],"be":[29],"done":[30],"meticulously":[31],"since":[32],"calculation":[34],"highly":[36],"time":[37],"demanding.":[38],"Previously":[39],"we":[40,68,152],"have":[41],"seen":[42],"that":[43,93,169],"most":[45],"known":[46],"dataset":[49,88],"QM9":[50,61,95],"lacks":[51],"diversity.":[53],"As":[54],"a":[55,74,84,101,135,141,165,184,199,208,222],"consequence,":[56],"ML":[57],"models":[58],"trained":[59],"on":[60,111,117],"showed":[62],"generalizability":[63],"shortcomings.":[64],"In":[65,148],"this":[66],"paper":[67],"would":[69,170],"like":[70],"to":[71,79,129,137,161,183,194,217],"present":[72],"(i)":[73],"fast":[75],"generic":[77],"method":[78],"evaluate":[80],"diversity,":[82],"(ii)":[83],"new":[85,97],"435k":[90],"molecules,":[91],"OD9,":[92],"includes":[94],"molecules":[98,168],"generated":[99],"objective,":[103],"(iii)":[104],"an":[105],"analysis":[106],"impact":[110,133],"unconstrained":[112],"goal-directed":[114,195],"generation":[116],"example":[119],"QED":[121,201],"optimization.":[122],"Our":[123],"innovative":[124],"approach":[125],"makes":[126],"it":[127],"possible":[128],"individually":[130],"estimate":[131],"solution":[136],"set,":[142],"allowing":[143],"effective":[145],"incremental":[146],"evaluation.":[147],"first":[150],"application,":[151],"will":[153],"see":[154],"how":[155],"constraint":[158],"allows":[159],"us":[160],"generate":[162],"more":[163],"than":[164],"million":[166],"efficiently":[171],"complete":[172],"reference":[174],"compounds":[177],"were":[178],"calculated":[179],"DFT":[181],"thanks":[182],"collaborative":[185],"effort":[186],"through":[187],"QuChemPedIA@home":[189],"BOINC":[190],"project.":[191],"With":[192],"regard":[193],"generation,":[197],"getting":[198],"high":[200],"score":[202],"not":[204],"complicated,":[205],"but":[206],"adding":[207],"little":[209],"can":[211],"cut":[212],"number":[214],"calls":[216],"evaluation":[219],"function":[220],"by":[221],"factor":[223],"ten.":[225]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2026-01-13T01:12:25.745995","created_date":"2025-10-10T00:00:00"}
