{"id":"https://openalex.org/W4296686673","doi":"https://doi.org/10.1021/acs.jcim.2c00588","title":"Self-Supervised Molecular Pretraining Strategy for Low-Resource Reaction Prediction Scenarios","display_name":"Self-Supervised Molecular Pretraining Strategy for Low-Resource Reaction Prediction Scenarios","publication_year":2022,"publication_date":"2022-09-21","ids":{"openalex":"https://openalex.org/W4296686673","doi":"https://doi.org/10.1021/acs.jcim.2c00588","pmid":"https://pubmed.ncbi.nlm.nih.gov/36129104"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.2c00588","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.2c00588","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034565668","display_name":"Zhipeng Wu","orcid":"https://orcid.org/0000-0003-3535-1081"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhipeng Wu","raw_affiliation_strings":["Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China"],"raw_orcid":"https://orcid.org/0000-0003-3535-1081","affiliations":[{"raw_affiliation_string":"Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102707561","display_name":"Xiang Cai","orcid":"https://orcid.org/0000-0001-7265-6867"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiang Cai","raw_affiliation_strings":["PyWise Biotech, Suzhou 215000, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PyWise Biotech, Suzhou 215000, P. R. China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102819904","display_name":"Chengyun Zhang","orcid":"https://orcid.org/0000-0002-8499-5273"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengyun Zhang","raw_affiliation_strings":["Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028784084","display_name":"Haoran Qiao","orcid":"https://orcid.org/0000-0001-8483-6566"},"institutions":[{"id":"https://openalex.org/I23632641","display_name":"Shanghai University of Electric Power","ror":"https://ror.org/02w4tny03","country_code":"CN","type":"education","lineage":["https://openalex.org/I23632641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoran Qiao","raw_affiliation_strings":["College of Mathematics and Physics, Shanghai University of Electric Power, Shanghai 201203, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Mathematics and Physics, Shanghai University of Electric Power, Shanghai 201203, P. R. China","institution_ids":["https://openalex.org/I23632641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007801562","display_name":"Yejian Wu","orcid":"https://orcid.org/0000-0002-9581-2479"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yejian Wu","raw_affiliation_strings":["Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100356844","display_name":"Yun Zhang","orcid":"https://orcid.org/0000-0003-3353-4650"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yun Zhang","raw_affiliation_strings":["Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005102596","display_name":"Xinqiao Wang","orcid":"https://orcid.org/0000-0001-6071-832X"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinqiao Wang","raw_affiliation_strings":["Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China"],"raw_orcid":"https://orcid.org/0000-0001-6071-832X","affiliations":[{"raw_affiliation_string":"Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China","institution_ids":["https://openalex.org/I55712492"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Haiying Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haiying Xie","raw_affiliation_strings":["PUROTON Gene Medical Institute Co., Ltd., Chongqing 400700, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PUROTON Gene Medical Institute Co., Ltd., Chongqing 400700, P. R. China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100700376","display_name":"Feng Luo","orcid":"https://orcid.org/0000-0002-1410-1705"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng Luo","raw_affiliation_strings":["PUROTON Gene Medical Institute Co., Ltd., Chongqing 400700, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PUROTON Gene Medical Institute Co., Ltd., Chongqing 400700, P. R. China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017761454","display_name":"Hongliang Duan","orcid":"https://orcid.org/0000-0002-9194-0115"},"institutions":[{"id":"https://openalex.org/I55712492","display_name":"Zhejiang University of Technology","ror":"https://ror.org/02djqfd08","country_code":"CN","type":"education","lineage":["https://openalex.org/I55712492"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongliang Duan","raw_affiliation_strings":["Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Artificial Intelligence Aided Drug Discovery Institute, College of Pharmaceutical Sciences, Zhejiang University of Technology, Hangzhou 310014, P. R. China","institution_ids":["https://openalex.org/I55712492"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5017761454"],"corresponding_institution_ids":["https://openalex.org/I55712492"],"apc_list":null,"apc_paid":null,"fwci":0.6783,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.63099905,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"62","issue":"19","first_page":"4579","last_page":"4590"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9797000288963318,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5392159223556519},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5050681233406067},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.46097293496131897},{"id":"https://openalex.org/keywords/reaction-conditions","display_name":"Reaction conditions","score":0.437987357378006},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.42861804366111755},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4261068105697632},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4251987040042877},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3744126856327057},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.3199794888496399},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1300121545791626},{"id":"https://openalex.org/keywords/organic-chemistry","display_name":"Organic chemistry","score":0.1050214171409607},{"id":"https://openalex.org/keywords/catalysis","display_name":"Catalysis","score":0.09851023554801941}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5392159223556519},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5050681233406067},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.46097293496131897},{"id":"https://openalex.org/C2991944050","wikidata":"https://www.wikidata.org/wiki/Q36534","display_name":"Reaction conditions","level":3,"score":0.437987357378006},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.42861804366111755},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4261068105697632},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4251987040042877},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3744126856327057},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.3199794888496399},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1300121545791626},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.1050214171409607},{"id":"https://openalex.org/C161790260","wikidata":"https://www.wikidata.org/wiki/Q82264","display_name":"Catalysis","level":2,"score":0.09851023554801941},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1021/acs.jcim.2c00588","is_oa":false,"landing_page_url":"https://doi.org/10.1021/acs.jcim.2c00588","pdf_url":null,"source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:36129104","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/36129104","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4699999988079071,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[{"id":"https://openalex.org/G1774537051","display_name":null,"funder_award_id":"81903438","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7984182621","display_name":null,"funder_award_id":"LD22H300004","funder_id":"https://openalex.org/F4320338464","funder_display_name":"Natural Science Foundation of Zhejiang Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320338464","display_name":"Natural Science Foundation of Zhejiang Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1757990252","https://openalex.org/W1975147762","https://openalex.org/W1976245763","https://openalex.org/W2033834272","https://openalex.org/W2046747992","https://openalex.org/W2105966118","https://openalex.org/W2159668990","https://openalex.org/W2165698076","https://openalex.org/W2194775991","https://openalex.org/W2242932006","https://openalex.org/W2512660405","https://openalex.org/W2606363443","https://openalex.org/W2621742623","https://openalex.org/W2785942661","https://openalex.org/W2789366878","https://openalex.org/W2791657723","https://openalex.org/W2900090807","https://openalex.org/W2947423323","https://openalex.org/W2969507301","https://openalex.org/W2970764640","https://openalex.org/W2991508457","https://openalex.org/W2998659621","https://openalex.org/W3014689923","https://openalex.org/W3023986361","https://openalex.org/W3038856956","https://openalex.org/W3042826782","https://openalex.org/W3088265803","https://openalex.org/W3088999551","https://openalex.org/W3092557781","https://openalex.org/W3094771832","https://openalex.org/W3100545487","https://openalex.org/W3103092523","https://openalex.org/W3117879109","https://openalex.org/W3215885522","https://openalex.org/W4221074165","https://openalex.org/W4248083651"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4224009465","https://openalex.org/W4286629047","https://openalex.org/W4306321456","https://openalex.org/W4285260836","https://openalex.org/W3046775127","https://openalex.org/W4205958290"],"abstract_inverted_index":{"In":[0],"the":[1,30,35,54,64,73,78,90],"face":[2],"of":[3,38,57,77],"low-resource":[4,109],"reaction":[5,16,49,65,100,110],"training":[6],"samples,":[7],"we":[8,60,70],"construct":[9],"a":[10,20,47],"chemical":[11,36],"platform":[12],"for":[13],"addressing":[14],"small-scale":[15,48],"prediction":[17],"problems.":[18],"Using":[19],"self-supervised":[21],"pretraining":[22],"strategy":[23],"called":[24],"MAsked":[25],"Sequence":[26,28],"to":[27,108],"(MASS),":[29],"Transformer":[31,79],"model":[32,80],"can":[33,81],"absorb":[34],"information":[37],"about":[39],"1":[40],"billion":[41],"molecules":[42],"and":[43,86,96],"then":[44],"fine-tune":[45],"on":[46],"prediction.":[50,111],"To":[51],"further":[52],"strengthen":[53],"predictive":[55],"performance":[56],"our":[58],"model,":[59],"combine":[61],"MASS":[62],"with":[63],"transfer":[66],"learning":[67],"strategy.":[68],"Here,":[69],"show":[71],"that":[72],"average":[74],"improved":[75],"accuracies":[76],"reach":[82],"14.07,":[83],"24.26,":[84],"40.31,":[85],"57.69%":[87],"in":[88],"predicting":[89],"Baeyer-Villiger,":[91],"Heck,":[92],"C-C":[93],"bond":[94],"formation,":[95],"functional":[97],"group":[98],"interconversion":[99],"data":[101],"sets,":[102],"respectively,":[103],"marking":[104],"an":[105],"important":[106],"step":[107]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2026-05-29T09:21:14.243279","created_date":"2025-10-10T00:00:00"}
