{"id":"https://openalex.org/W7133331632","doi":"https://doi.org/10.48550/arxiv.2603.01840","title":"FireRed-OCR Technical Report","display_name":"FireRed-OCR Technical Report","publication_year":2026,"publication_date":"2026-03-02","ids":{"openalex":"https://openalex.org/W7133331632","doi":"https://doi.org/10.48550/arxiv.2603.01840"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.01840","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.01840","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5127911811","display_name":"Hao Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119180908","display_name":"Haoran Lou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lou, Haoran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127928772","display_name":"Xinyue Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xinyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113115990","display_name":"Zuodong Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Zuodong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127992325","display_name":"Zhaojun Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Zhaojun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128009776","display_name":"Phellon Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Phellon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127905424","display_name":"Xuanhe Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Xuanhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075556202","display_name":"Kai Zuo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zuo, Kai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127927324","display_name":"Yibo Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Yibo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127941483","display_name":"Xu Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Xu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127990904","display_name":"Yao Hu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Yao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125106554","display_name":"Boxiang Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Boxiang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127977155","display_name":"Jian Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Jian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128010428","display_name":"Yongji Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Yongji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125427008","display_name":"Wenxin Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Wenxin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109634508","display_name":"Yingmiao Liu","orcid":"https://orcid.org/0009-0004-9580-3726"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yingmiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128024480","display_name":"Yuhao Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Yuhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127971476","display_name":"Manjie Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Manjie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127906499","display_name":"Gang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Gang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125008852","display_name":"Yidong Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Yidong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5127978879","display_name":"Zhichao Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Zhichao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5090906485","display_name":"Changhao Qiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiao, Changhao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":22,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.29910001158714294,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.29910001158714294,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.2856000065803528,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.07680000364780426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6588000059127808},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.6212000250816345},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.5340999960899353},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.515999972820282},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.45210000872612},{"id":"https://openalex.org/keywords/feature-engineering","display_name":"Feature engineering","score":0.4505999982357025},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4366999864578247},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.40400001406669617}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6898000240325928},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6588000059127808},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.6212000250816345},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.5340999960899353},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.515999972820282},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48919999599456787},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.45210000872612},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.4505999982357025},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4366999864578247},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.40400001406669617},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.3716000020503998},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3711000084877014},{"id":"https://openalex.org/C182449105","wikidata":"https://www.wikidata.org/wiki/Q3099732","display_name":"Technical report","level":2,"score":0.3702000081539154},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.36230000853538513},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3443000018596649},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32820001244544983},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3102000057697296},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.2953999936580658},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2750000059604645},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27459999918937683},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.01840","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.01840","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.01840","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.6734907627105713}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,209],"present":[1],"FireRed-OCR,":[2,45],"a":[3,46,73,96,111],"systematic":[4],"framework":[5,48],"to":[6,50,92,123,133,161,216,221],"specialize":[7],"general":[8,21],"VLMs":[9,53],"into":[10,57],"high-performance":[11],"OCR":[12,38],"models.":[13],"Large":[14],"Vision-Language":[15],"Models":[16],"(VLMs)":[17],"have":[18],"demonstrated":[19],"impressive":[20],"capabilities":[22],"but":[23],"frequently":[24],"suffer":[25],"from":[26,120],"``structural":[27],"hallucination''":[28],"when":[29],"processing":[30],"complex":[31],"documents,":[32],"limiting":[33],"their":[34],"utility":[35],"in":[36],"industrial":[37],"applications.":[39],"In":[40],"this":[41],"paper,":[42],"we":[43,71,109],"introduce":[44],"novel":[47],"designed":[49],"transform":[51],"general-purpose":[52],"(based":[54],"on":[55,176],"Qwen3-VL)":[56],"pixel-precise":[58],"structural":[59,167],"document":[60,106,139],"parsing":[61],"experts.":[62],"To":[63],"address":[64],"the":[65,118,135,218],"scarcity":[66],"of":[67,138,189],"high-quality":[68],"structured":[69],"data,":[70],"construct":[72],"``Geometry":[74],"+":[75],"Semantics''":[76],"Data":[77],"Factory.":[78],"Unlike":[79],"traditional":[80],"random":[81],"sampling,":[82],"our":[83,211],"pipeline":[84],"leverages":[85],"geometric":[86],"feature":[87],"clustering":[88],"and":[89,94,104,149,166,199,205,213],"multi-dimensional":[90],"tagging":[91],"synthesize":[93],"curate":[95],"highly":[97],"balanced":[98],"dataset,":[99],"effectively":[100],"handling":[101],"long-tail":[102],"layouts":[103],"rare":[105],"types.":[107],"Furthermore,":[108],"propose":[110],"Three-Stage":[112],"Progressive":[113],"Training":[114],"strategy":[115],"that":[116,180],"guides":[117],"model":[119,214],"pixel-level":[121],"perception":[122],"logical":[124],"structure":[125],"generation.":[126],"This":[127],"curriculum":[128],"includes:":[129],"(1)":[130],"Multi-task":[131],"Pre-alignment":[132],"ground":[134],"model's":[136],"understanding":[137],"structure;":[140],"(2)":[141],"Specialized":[142,222],"SFT":[143],"for":[144],"standardizing":[145],"full-image":[146],"Markdown":[147],"output;":[148],"(3)":[150],"Format-Constrained":[151],"Group":[152],"Relative":[153],"Policy":[154],"Optimization":[155],"(GRPO),":[156],"which":[157],"utilizes":[158],"reinforcement":[159],"learning":[160],"enforce":[162],"strict":[163],"syntactic":[164],"validity":[165],"integrity":[168],"(e.g.,":[169],"table":[170],"closure,":[171],"formula":[172],"syntax).":[173],"Extensive":[174],"evaluations":[175],"OmniDocBench":[177],"v1.5":[178],"demonstrate":[179],"FireRed-OCR":[181],"achieves":[182],"state-of-the-art":[183],"performance":[184],"with":[185],"an":[186],"overall":[187],"score":[188],"92.94\\%,":[190],"significantly":[191],"outperforming":[192],"strong":[193],"baselines":[194],"such":[195],"as":[196],"DeepSeek-OCR":[197],"2":[198],"OCRVerse":[200],"across":[201],"text,":[202],"formula,":[203],"table,":[204],"reading":[206],"order":[207],"metrics.":[208],"open-source":[210],"code":[212],"weights":[215],"facilitate":[217],"``General":[219],"VLM":[220],"Structural":[223],"Expert''":[224],"paradigm.":[225]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-04T00:00:00"}
