{"id":"https://openalex.org/W7130723623","doi":"https://doi.org/10.48550/arxiv.2602.17602","title":"MolHIT: Advancing Molecular-Graph Generation with Hierarchical Discrete Diffusion Models","display_name":"MolHIT: Advancing Molecular-Graph Generation with Hierarchical Discrete Diffusion Models","publication_year":2026,"publication_date":"2026-02-19","ids":{"openalex":"https://openalex.org/W7130723623","doi":"https://doi.org/10.48550/arxiv.2602.17602"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.17602","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126472239","display_name":"Hojung Jung","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jung, Hojung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024797101","display_name":"Rodrigo Horm\u00e1zabal","orcid":"https://orcid.org/0000-0003-3744-5401"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hormazabal, Rodrigo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126449441","display_name":"Jaehyeong Jo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jo, Jaehyeong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126511110","display_name":"Youngrok Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Youngrok","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126448163","display_name":"Kyunggeun Roh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roh, Kyunggeun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091674853","display_name":"Se-Young Yun","orcid":"https://orcid.org/0000-0001-6675-5113"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yun, Se-Young","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126475659","display_name":"Sehui Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Sehui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126472004","display_name":"Dae-Woong Jeong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jeong, Dae-Woong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5126472239"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.6485000252723694,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.6485000252723694,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.22619999945163727,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.058800000697374344,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5590000152587891},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5181999802589417},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.4982999861240387},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.4729999899864197},{"id":"https://openalex.org/keywords/graph-theory","display_name":"Graph theory","score":0.40139999985694885},{"id":"https://openalex.org/keywords/topology","display_name":"Topology (electrical circuits)","score":0.37630000710487366}],"concepts":[{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5590000152587891},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5472000241279602},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5181999802589417},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.4982999861240387},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4729999899864197},{"id":"https://openalex.org/C88230418","wikidata":"https://www.wikidata.org/wiki/Q131476","display_name":"Graph theory","level":2,"score":0.40139999985694885},{"id":"https://openalex.org/C186060115","wikidata":"https://www.wikidata.org/wiki/Q30336093","display_name":"Biological system","level":1,"score":0.3993000090122223},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3950999975204468},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3873000144958496},{"id":"https://openalex.org/C184720557","wikidata":"https://www.wikidata.org/wiki/Q7825049","display_name":"Topology (electrical circuits)","level":2,"score":0.37630000710487366},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.3463999927043915},{"id":"https://openalex.org/C55689738","wikidata":"https://www.wikidata.org/wiki/Q15963867","display_name":"Discrete time and continuous time","level":2,"score":0.34209999442100525},{"id":"https://openalex.org/C124223222","wikidata":"https://www.wikidata.org/wiki/Q2281940","display_name":"Chemical process","level":2,"score":0.3260999917984009},{"id":"https://openalex.org/C58312451","wikidata":"https://www.wikidata.org/wiki/Q4817200","display_name":"Atom (system on chip)","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.28060001134872437},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.26669999957084656}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.17602","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.17602","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.17602","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.17602","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Molecular":[0],"generation":[1,63,145],"with":[2,117],"diffusion":[3,20,85],"models":[4,21,36],"has":[5],"emerged":[6],"as":[7],"a":[8,59],"promising":[9],"direction":[10],"for":[11,120],"AI-driven":[12],"drug":[13],"discovery":[14],"and":[15,42,93,146],"materials":[16],"science.":[17],"While":[18],"graph":[19,62,125],"have":[22],"been":[23],"widely":[24],"adopted":[25],"due":[26],"to":[27,44,50,86,103],"the":[28,46,77,99,114,121],"discrete":[29,84],"nature":[30],"of":[31],"2D":[32],"molecular":[33,61],"graphs,":[34],"existing":[35,71],"suffer":[37],"from":[38],"low":[39],"chemical":[40,91,105],"validity":[41,119],"struggle":[43],"meet":[45],"desired":[47],"properties":[48],"compared":[49],"1D":[51,129],"modeling.":[52],"In":[53],"this":[54],"work,":[55],"we":[56],"introduce":[57],"MolHIT,":[58],"powerful":[60],"framework":[64],"that":[65,89,97],"overcomes":[66],"long-standing":[67],"performance":[68,112,138],"limitations":[69],"in":[70,124,139],"methods.":[72],"MolHIT":[73,108],"is":[74],"based":[75],"on":[76,113],"Hierarchical":[78],"Discrete":[79],"Diffusion":[80],"Model,":[81],"which":[82],"generalizes":[83],"additional":[87],"categories":[88],"encode":[90],"priors,":[92],"decoupled":[94],"atom":[95,100],"encoding":[96],"splits":[98],"types":[101],"according":[102],"their":[104],"roles.":[106],"Overall,":[107],"achieves":[109],"new":[110],"state-of-the-art":[111],"MOSES":[115],"dataset":[116],"near-perfect":[118],"first":[122],"time":[123],"diffusion,":[126],"surpassing":[127],"strong":[128,137],"baselines":[130],"across":[131],"multiple":[132],"metrics.":[133],"We":[134],"further":[135],"demonstrate":[136],"downstream":[140],"tasks,":[141],"including":[142],"multi-property":[143],"guided":[144],"scaffold":[147],"extension.":[148]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-21T00:00:00"}
