{"id":"https://openalex.org/W4386644424","doi":"https://doi.org/10.48550/arxiv.2309.04589","title":"Motif-aware Attribute Masking for Molecular Graph Pre-training","display_name":"Motif-aware Attribute Masking for Molecular Graph Pre-training","publication_year":2023,"publication_date":"2023-09-08","ids":{"openalex":"https://openalex.org/W4386644424","doi":"https://doi.org/10.48550/arxiv.2309.04589"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2309.04589","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.04589","pdf_url":"https://arxiv.org/pdf/2309.04589","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2309.04589","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052023514","display_name":"Eric Inae","orcid":"https://orcid.org/0009-0002-2101-2126"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Inae, Eric","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100739873","display_name":"Gang Liu","orcid":"https://orcid.org/0000-0003-4204-731X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Gang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5074821819","display_name":"Meng Jiang","orcid":"https://orcid.org/0000-0002-3009-519X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Meng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5052023514"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7171826958656311},{"id":"https://openalex.org/keywords/motif","display_name":"Motif (music)","score":0.6183726787567139},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5253861546516418},{"id":"https://openalex.org/keywords/disjoint-sets","display_name":"Disjoint sets","score":0.518439769744873},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4765758514404297},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.43945345282554626},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.41763490438461304},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3665868639945984},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3637159764766693},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33458244800567627},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12704426050186157},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.11473262310028076}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7171826958656311},{"id":"https://openalex.org/C32276052","wikidata":"https://www.wikidata.org/wiki/Q908349","display_name":"Motif (music)","level":2,"score":0.6183726787567139},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5253861546516418},{"id":"https://openalex.org/C45340560","wikidata":"https://www.wikidata.org/wiki/Q215382","display_name":"Disjoint sets","level":2,"score":0.518439769744873},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4765758514404297},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.43945345282554626},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.41763490438461304},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3665868639945984},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3637159764766693},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33458244800567627},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12704426050186157},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.11473262310028076},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2309.04589","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.04589","pdf_url":"https://arxiv.org/pdf/2309.04589","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2309.04589","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2309.04589","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2309.04589","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.04589","pdf_url":"https://arxiv.org/pdf/2309.04589","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1272117193","display_name":null,"funder_award_id":"2146761","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1709352092","display_name":"CAREER: Synergistic Approaches for Specialized Intelligent Assistance","funder_award_id":"2142827","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1824154665","display_name":null,"funder_award_id":"CBET-2332270","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2567596830","display_name":null,"funder_award_id":"214282","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3473239542","display_name":null,"funder_award_id":"2332270","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4720003262","display_name":null,"funder_award_id":"N00014-22","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G4776870722","display_name":null,"funder_award_id":"unknown","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G495881888","display_name":null,"funder_award_id":"N00014-22-1-2507","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"},{"id":"https://openalex.org/G6436526700","display_name":"III: Small: Intelligent Scientific Text Analytics with Knowledge-Augmented Abductive Reasoning","funder_award_id":"2234058","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8876996369","display_name":null,"funder_award_id":"N00014","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"},{"id":"https://openalex.org/F4320337390","display_name":"Division of Chemical, Bioengineering, Environmental, and Transport Systems","ror":"https://ror.org/0471zv972"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4386644424.pdf","grobid_xml":"https://content.openalex.org/works/W4386644424.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3170299350","https://openalex.org/W2368410102","https://openalex.org/W2368037387","https://openalex.org/W190186656","https://openalex.org/W2902352756","https://openalex.org/W2377079823","https://openalex.org/W4256429076","https://openalex.org/W2605676258","https://openalex.org/W2599962286","https://openalex.org/W2319582300"],"abstract_inverted_index":{"Attribute":[0],"reconstruction":[1],"is":[2,30,140],"used":[3],"to":[4,25,52,72,124],"predict":[5],"node":[6,149,166],"or":[7,109],"edge":[8],"features":[9,146,163],"in":[10,40,89,134],"the":[11,57,63,69,79,95,103,106,130,145,161,168],"pre-training":[12],"of":[13,21,59,65,132,164],"graph":[14,139,157],"neural":[15],"networks.":[16],"Given":[17],"a":[18,90,151],"large":[19],"number":[20],"molecules,":[22],"they":[23],"learn":[24,73,82,100],"capture":[26,125],"structural":[27],"knowledge,":[28],"which":[29],"transferable":[31],"for":[32,147,170],"various":[33],"downstream":[34],"property":[35,179],"prediction":[36,180],"tasks":[37],"and":[38,43,118,182],"vital":[39],"chemistry,":[41],"biomedicine,":[42],"material":[44],"science.":[45],"Previous":[46],"strategies":[47,123],"that":[48],"randomly":[49],"select":[50],"nodes":[51],"do":[53],"attribute":[54,121],"masking":[55,122],"leverage":[56],"information":[58,131],"local":[60],"neighbors":[61,67],"However,":[62],"over-reliance":[64],"these":[66],"inhibits":[68],"model's":[70],"ability":[71],"from":[74,84,102],"higher-level":[75],"substructures.":[76],"For":[77],"example,":[78],"model":[80],"would":[81],"little":[83],"predicting":[85],"three":[86,97],"carbon":[87],"atoms":[88,133],"benzene":[91],"ring":[92],"based":[93],"on":[94,176],"other":[96],"but":[98],"could":[99],"more":[101],"inter-connections":[104],"between":[105],"functional":[107],"groups,":[108],"called":[110],"chemical":[111],"motifs.":[112,136],"In":[113],"this":[114],"work,":[115],"we":[116],"propose":[117],"investigate":[119],"motif-aware":[120],"inter-motif":[126],"structures":[127],"by":[128],"leveraging":[129],"neighboring":[135],"Once":[137],"each":[138,165],"decomposed":[141],"into":[142],"disjoint":[143],"motifs,":[144],"every":[148],"within":[150,167],"sample":[152],"motif":[153,169],"are":[154],"masked.":[155],"The":[156],"decoder":[158],"then":[159],"predicts":[160],"masked":[162],"reconstruction.":[171],"We":[172],"evaluate":[173],"our":[174],"approach":[175],"eight":[177],"molecular":[178],"datasets":[181],"demonstrate":[183],"its":[184],"advantages.":[185]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
