{"id":"https://openalex.org/W7151520441","doi":"https://doi.org/10.48550/arxiv.2604.03476","title":"Fine-tuning DeepSeek-OCR-2 for Molecular Structure Recognition","display_name":"Fine-tuning DeepSeek-OCR-2 for Molecular Structure Recognition","publication_year":2026,"publication_date":"2026-04-03","ids":{"openalex":"https://openalex.org/W7151520441","doi":"https://doi.org/10.48550/arxiv.2604.03476"},"language":"en","primary_location":{"id":"pmh:oai:pubmedcentral.nih.gov:13131839","is_oa":true,"landing_page_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC13131839/","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ArXiv","raw_type":"Text"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC13131839/","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133137633","display_name":"Haocheng Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Haocheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133074425","display_name":"Xingyu Dang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dang, Xingyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133131391","display_name":"Junmei Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Junmei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.6014999747276306,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.6014999747276306,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.03880000114440918,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.03849999979138374,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pubchem","display_name":"PubChem","score":0.6416000127792358},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6049000024795532},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5033000111579895},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4828000068664551},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.44339999556541443},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38609999418258667},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.32089999318122864},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.30559998750686646}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.673799991607666},{"id":"https://openalex.org/C158180186","wikidata":"https://www.wikidata.org/wiki/Q278487","display_name":"PubChem","level":2,"score":0.6416000127792358},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6049000024795532},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6001999974250793},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5033000111579895},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4828000068664551},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.44339999556541443},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38609999418258667},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3264000117778778},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.32089999318122864},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.30559998750686646},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C68762167","wikidata":"https://www.wikidata.org/wiki/Q910164","display_name":"Cheminformatics","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C179303850","wikidata":"https://www.wikidata.org/wiki/Q4300362","display_name":"Molecular recognition","level":3,"score":0.28940001130104065},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.2727000117301941},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.272599995136261},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C99726746","wikidata":"https://www.wikidata.org/wiki/Q906396","display_name":"Chemical space","level":3,"score":0.25609999895095825}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:pubmedcentral.nih.gov:13131839","is_oa":true,"landing_page_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC13131839/","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ArXiv","raw_type":"Text"},{"id":"doi:10.48550/arxiv.2604.03476","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03476","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:pubmedcentral.nih.gov:13131839","is_oa":true,"landing_page_url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC13131839/","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ArXiv","raw_type":"Text"},"sustainable_development_goals":[{"score":0.5376380681991577,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Optical":[0],"Chemical":[1],"Structure":[2],"Recognition":[3],"(OCSR)":[4],"is":[5],"critical":[6],"for":[7,48,154],"converting":[8],"2D":[9],"molecular":[10,49],"diagrams":[11],"from":[12,97,103],"printed":[13],"literature":[14],"into":[15],"machine-readable":[16],"formats.":[17],"While":[18],"Vision-Language":[19],"Models":[20],"have":[21],"shown":[22],"promise":[23],"in":[24],"end-to-end":[25],"OCR":[26],"tasks,":[27],"their":[28],"direct":[29,36],"application":[30],"to":[31,78,105,122,131,147],"OCSR":[32],"remains":[33,129],"challenging,":[34],"and":[35,76,99,108,140],"full-parameter":[37,80],"supervised":[38,69],"fine-tuning":[39,70,81],"often":[40],"fails.":[41],"In":[42],"this":[43],"work,":[44],"we":[45,64,136],"adapt":[46],"DeepSeek-OCR-2":[47],"optical":[50],"recognition":[51],"by":[52],"formulating":[53],"the":[54,123,149],"task":[55],"as":[56],"image-conditioned":[57],"SMILES":[58,156],"generation.":[59],"To":[60],"overcome":[61],"training":[62],"instabilities,":[63],"propose":[65],"a":[66,91],"two-stage":[67],"progressive":[68],"strategy:":[71],"starting":[72],"with":[73,82],"parameter-efficient":[74],"LoRA":[75],"transitioning":[77],"selective":[79],"split":[83],"learning":[84],"rates.":[85],"We":[86],"train":[87],"our":[88],"model":[89],"on":[90],"large-scale":[92],"corpus":[93],"combining":[94],"synthetic":[95],"renderings":[96],"PubChem":[98],"realistic":[100],"patent":[101],"images":[102],"USPTO-MOL":[104],"improve":[106,148],"coverage":[107],"robustness.":[109],"Our":[110],"fine-tuned":[111],"model,":[112],"MolSeek-OCR,":[113],"demonstrates":[114],"competitive":[115],"capabilities,":[116],"achieving":[117],"exact":[118,155],"matching":[119],"accuracies":[120],"comparable":[121],"best-performing":[124],"image-to-sequence":[125],"model.":[126],"However,":[127],"it":[128],"inferior":[130],"state-of-the-art":[132],"image-to-graph":[133],"modelS.":[134],"Furthermore,":[135],"explore":[137],"reinforcement-style":[138],"post-training":[139],"data-curation-based":[141],"refinement,":[142],"finding":[143],"that":[144],"they":[145],"fail":[146],"strict":[150],"sequence-level":[151],"fidelity":[152],"required":[153],"matching.":[157]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-08T00:00:00"}
