{"id":"https://openalex.org/W7151400241","doi":"https://doi.org/10.48550/arxiv.2604.04771","title":"MinerU2.5-Pro: Pushing the Limits of Data-Centric Document Parsing at Scale","display_name":"MinerU2.5-Pro: Pushing the Limits of Data-Centric Document Parsing at Scale","publication_year":2026,"publication_date":"2026-04-06","ids":{"openalex":"https://openalex.org/W7151400241","doi":"https://doi.org/10.48550/arxiv.2604.04771"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.04771","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04771","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.04771","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133110275","display_name":"Bin Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Bin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133124857","display_name":"Tianyao He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Tianyao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104670548","display_name":"Linke Ouyang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ouyang, Linke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133079453","display_name":"Fan Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Fan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133103720","display_name":"Zhiyuan Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Zhiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133131066","display_name":"Tao Chu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chu, Tao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133100257","display_name":"Yuan Qu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qu, Yuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133131510","display_name":"Zhenjiang Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Zhenjiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133082205","display_name":"Weijun Zeng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Weijun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133078717","display_name":"Ziyang Miao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miao, Ziyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101448143","display_name":"Boyi Xu","orcid":"https://orcid.org/0009-0008-2531-044X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Bangrui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085823891","display_name":"Junbo Niu","orcid":"https://orcid.org/0000-0002-2135-6853"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Niu, Junbo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133098090","display_name":"Mengzhang Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Mengzhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133082793","display_name":"Jiantao Qiu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Jiantao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020050741","display_name":"Q. T. Zhang","orcid":"https://orcid.org/0000-0002-8153-8018"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Qintong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133071712","display_name":"Dongsheng Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Dongsheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133109065","display_name":"Yuefeng Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Yuefeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133104670","display_name":"Hejun Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Hejun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133072873","display_name":"Wenzheng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Wenzheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133093784","display_name":"Jutao Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Jutao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054046165","display_name":"Jiayong Shi","orcid":"https://orcid.org/0000-0002-1403-7862"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Jiayong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133086636","display_name":"Pengyu Liao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liao, Pengyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133114336","display_name":"Xiaomeng Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Xiaomeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069276737","display_name":"Huaping Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Huaping","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wei, Liqun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Liqun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133097687","display_name":"Jing Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133094902","display_name":"Jie Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133100478","display_name":"Wei Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072972030","display_name":"Shasha Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Shasha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082197282","display_name":"Q. D. Wu","orcid":"https://orcid.org/0009-0000-8013-8525"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Qianqian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133076451","display_name":"Xuanhe Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Xuanhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133103350","display_name":"Weijia Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Weijia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133105918","display_name":"Zhenxiang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zhenxiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078048178","display_name":"Zhongying Tu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tu, Zhongying","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133102532","display_name":"Jiang Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Jiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133103897","display_name":"Lijun Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Lijun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133075735","display_name":"Chao Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Chao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133140490","display_name":"Kai Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Kai","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133067705","display_name":"Wentao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Wentao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133076043","display_name":"Yu Qiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiao, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133087253","display_name":"Bowen Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Bowen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133067844","display_name":"Dahua Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Dahua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133114586","display_name":"Conghui He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Conghui","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":43,"corresponding_author_ids":["https://openalex.org/A5133110275"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5008999705314636,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5008999705314636,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.11800000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.06629999727010727,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7287999987602234},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6657999753952026},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5034999847412109},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.48820000886917114},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.46970000863075256},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.46860000491142273},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.46790000796318054},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.44940000772476196},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.44929999113082886}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7681000232696533},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7287999987602234},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6657999753952026},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5037000179290771},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5034999847412109},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.48820000886917114},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48660001158714294},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.46970000863075256},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.46860000491142273},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.46790000796318054},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4620000123977661},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.44940000772476196},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.44929999113082886},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.42160001397132874},{"id":"https://openalex.org/C75917345","wikidata":"https://www.wikidata.org/wiki/Q2725298","display_name":"Sampling bias","level":3,"score":0.38659998774528503},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.37369999289512634},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.35580000281333923},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3465000092983246},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.3203999996185303},{"id":"https://openalex.org/C33762810","wikidata":"https://www.wikidata.org/wiki/Q461671","display_name":"Data integrity","level":2,"score":0.31369999051094055},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.30790001153945923},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.30489999055862427},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.29350000619888306},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.25369998812675476}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.04771","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04771","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.04771","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.04771","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Current":[0],"document":[1],"parsing":[2],"methods":[3],"advance":[4],"primarily":[5],"through":[6,71,141],"model":[7],"architecture":[8,82],"innovation,":[9],"while":[10,78,111],"systematic":[11],"engineering":[12,73],"of":[13,36,67,83],"training":[14,49,75,103,148],"data":[15,50,72,104,159],"remains":[16],"underexplored.":[17],"Yet":[18],"state-of-the-art":[19],"models":[20,123,214],"spanning":[21],"diverse":[22],"architectures":[23],"and":[24,74,97,128,154,175,205],"parameter":[25],"scales":[26],"exhibit":[27],"highly":[28],"consistent":[29],"failure":[30],"patterns":[31],"on":[32,57,194,213],"the":[33,41,65,68,80,132,165,181,199],"same":[34],"set":[35],"hard":[37,139,151],"samples,":[38],"suggesting":[39],"that":[40],"performance":[42],"bottleneck":[43],"stems":[44],"from":[45,53,105],"shared":[46],"deficiencies":[47],"in":[48,172],"rather":[51],"than":[52],"architectural":[54,189],"differences.":[55],"Building":[56],"this":[58],"finding,":[59],"we":[60,168],"present":[61],"MinerU2.5-Pro,":[62],"which":[63],"advances":[64],"state":[66],"art":[69],"purely":[70],"strategy":[76],"design":[77],"retaining":[79],"1.2B-parameter":[81],"MinerU2.5":[84],"unchanged.":[85],"At":[86],"its":[87],"core":[88],"is":[89],"a":[90,177],"Data":[91],"Engine":[92],"co-designed":[93],"around":[94],"coverage,":[95],"informativeness,":[96],"annotation":[98,136],"accuracy:":[99],"Diversity-and-Difficulty-Aware":[100],"Sampling":[101],"expands":[102],"under":[106],"10M":[107],"to":[108,124],"65.5M":[109],"samples":[110,140],"mitigating":[112],"distribution":[113],"shift;":[114],"Cross-Model":[115],"Consistency":[116],"Verification":[117],"leverages":[118],"output":[119],"consensus":[120],"among":[121],"heterogeneous":[122],"assess":[125],"sample":[126,152],"difficulty":[127],"generate":[129],"reliable":[130],"annotations;":[131],"Judge-and-Refine":[133],"pipeline":[134],"improves":[135],"quality":[137,162],"for":[138],"render-then-verify":[142],"iterative":[143],"correction.":[144],"A":[145],"three-stage":[146],"progressive":[147],"strategy--large-scale":[149],"pre-training,":[150],"fine-tuning,":[153],"GRPO":[155],"alignment--sequentially":[156],"exploits":[157],"these":[158],"at":[160],"different":[161],"tiers.":[163],"On":[164],"evaluation":[166],"front,":[167],"rectify":[169],"element-matching":[170],"biases":[171],"OmniDocBench":[173,184,195],"v1.5":[174],"introduce":[176],"Hard":[178],"subset,":[179],"establishing":[180],"more":[182,218],"discriminative":[183],"v1.6":[185],"protocol.":[186],"Without":[187],"any":[188],"modification,":[190],"MinerU2.5-Pro":[191],"achieves":[192],"95.69":[193],"v1.6,":[196],"improving":[197],"over":[198,216],"same-architecture":[200],"baseline":[201],"by":[202],"2.71":[203],"points":[204],"surpassing":[206],"all":[207],"existing":[208],"methods,":[209],"including":[210],"those":[211],"based":[212],"with":[215],"200x":[217],"parameters.":[219]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2026-04-08T00:00:00"}
