{"id":"https://openalex.org/W7138386904","doi":"https://doi.org/10.48550/arxiv.2603.15011","title":"Molecular Identifier Visual Prompt and Verifiable Reinforcement Learning for Chemical Reaction Diagram Parsing","display_name":"Molecular Identifier Visual Prompt and Verifiable Reinforcement Learning for Chemical Reaction Diagram Parsing","publication_year":2026,"publication_date":"2026-03-16","ids":{"openalex":"https://openalex.org/W7138386904","doi":"https://doi.org/10.48550/arxiv.2603.15011"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.15011","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15011","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.15011","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129708597","display_name":"Jiahe Song","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Song, Jiahe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129738831","display_name":"Chuang Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Chuang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071329779","display_name":"Yinfan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yinfan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129734754","display_name":"Hao Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Hao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129645509","display_name":"Rui Nie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nie, Rui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129712623","display_name":"Bowen Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Bowen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129703391","display_name":"Xingjian Wei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Xingjian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129745641","display_name":"Junyuan Gao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Junyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129712787","display_name":"Yubin Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yubin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129708477","display_name":"Bin Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Bin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129696732","display_name":"Lijun Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Lijun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129695873","display_name":"Jiang Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Jiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129740932","display_name":"Qian Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Qian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129711326","display_name":"Conghui He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Conghui","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":14,"corresponding_author_ids":["https://openalex.org/A5129708597"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9606000185012817,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9606000185012817,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.006599999964237213,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.0019000000320374966,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.7490000128746033},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6051999926567078},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.5957000255584717},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5218999981880188},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.5054000020027161},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.46299999952316284}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7868000268936157},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.7490000128746033},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6051999926567078},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.5957000255584717},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.560699999332428},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5218999981880188},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.5054000020027161},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.46299999952316284},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4287000000476837},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4189999997615814},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3801000118255615},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32179999351501465},{"id":"https://openalex.org/C186399060","wikidata":"https://www.wikidata.org/wiki/Q959962","display_name":"Diagram","level":2,"score":0.30799999833106995},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.29269999265670776}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.15011","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15011","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.15011","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.15011","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7216868996620178,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reaction":[0],"diagram":[1,179],"parsing":[2],"(RxnDP)":[3],"is":[4,33],"critical":[5],"for":[6],"extracting":[7],"chemical":[8,42,98],"synthesis":[9],"information":[10],"from":[11,67],"literature.":[12],"Although":[13],"recent":[14],"Vision-Language":[15],"Models":[16],"(VLMs)":[17],"have":[18],"emerged":[19],"as":[20,80],"a":[21,126],"promising":[22],"paradigm":[23],"to":[24,39,95,116,134,161],"automate":[25],"this":[26,62],"complex":[27],"visual":[28,41],"reasoning":[29],"task,":[30],"their":[31],"application":[32],"fundamentally":[34],"bottlenecked":[35],"by":[36],"the":[37,48,97,150,172],"inability":[38],"align":[40],"entities":[43],"with":[44,158],"pre-trained":[45],"knowledge,":[46],"alongside":[47],"inherent":[49],"discrepancy":[50],"between":[51],"token-level":[52],"training":[53],"and":[54,73,108,166,174,186],"reaction-level":[55,137],"evaluation.":[56],"To":[57],"address":[58],"these":[59],"dual":[60],"challenges,":[61],"work":[63],"enhances":[64],"VLM-based":[65,177],"RxnDP":[66],"two":[68],"complementary":[69],"perspectives:":[70],"prompting":[71,113],"representation":[72],"learning":[74,128],"paradigms.":[75],"First,":[76],"we":[77,123,148],"propose":[78],"Identifier":[79],"Visual":[81],"Prompting":[82],"(IdtVP),":[83],"which":[84],"leverages":[85,131],"naturally":[86],"occurring":[87],"molecule":[88],"identifiers":[89],"(e.g.,":[90],"bold":[91],"numerals":[92],"like":[93],"1a)":[94],"activate":[96],"knowledge":[99],"acquired":[100],"during":[101],"VLM":[102],"pre-training.":[103],"IdtVP":[104],"enables":[105],"powerful":[106],"zero-shot":[107],"out-of-distribution":[109,167],"capabilities,":[110],"outperforming":[111],"existing":[112],"strategies.":[114],"Second,":[115],"further":[117],"optimize":[118,136],"performance":[119],"within":[120],"fine-tuning":[121],"paradigms,":[122],"introduce":[124],"Re3-DAPO,":[125],"reinforcement":[127],"algorithm":[129],"that":[130],"verifiable":[132],"rewards":[133],"directly":[135],"metrics,":[138],"thereby":[139],"achieving":[140],"consistent":[141],"gains":[142],"over":[143],"standard":[144],"supervised":[145],"fine-tuning.":[146],"Additionally,":[147],"release":[149,183],"ScannedRxn":[151],"benchmark,":[152],"comprising":[153],"scanned":[154],"historical":[155],"reaction":[156,178],"diagrams":[157],"real-world":[159],"artifacts,":[160],"rigorously":[162],"assess":[163],"model":[164],"robustness":[165],"ability.":[168],"Our":[169],"contributions":[170],"advance":[171],"accuracy":[173],"generalization":[175],"of":[176],"parsing.":[180],"We":[181],"will":[182],"data,":[184],"models,":[185],"code":[187],"on":[188],"GitHub.":[189]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
