{"id":"https://openalex.org/W7135036919","doi":"https://doi.org/10.48550/arxiv.2603.10910","title":"GLM-OCR Technical Report","display_name":"GLM-OCR Technical Report","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7135036919","doi":"https://doi.org/10.48550/arxiv.2603.10910"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.10910","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10910","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.10910","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128855462","display_name":"Shuaiqi Duan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Duan, Shuaiqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128847121","display_name":"Yadong Xue","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xue, Yadong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128820522","display_name":"Weihan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Weihan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128891885","display_name":"Zhe Su","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Su, Zhe","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128824512","display_name":"Huan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Huan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128910655","display_name":"Sheng Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Sheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047650196","display_name":"Guobing Gan","orcid":"https://orcid.org/0000-0003-0614-4200"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gan, Guobing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128867796","display_name":"Guo Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Guo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128855125","display_name":"Zihan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zihan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Yan, Shengdong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Shengdong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128915881","display_name":"Dexin Jin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Dexin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128836859","display_name":"Yuxuan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yuxuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128880373","display_name":"Guohong Wen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wen, Guohong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128841886","display_name":"Yanfeng Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yanfeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128898616","display_name":"Yutao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yutao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128852123","display_name":"Xiaohan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiaohan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128801411","display_name":"Wenyi Hong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hong, Wenyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035985651","display_name":"Yukuo Cen","orcid":"https://orcid.org/0000-0001-5682-2810"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cen, Yukuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110195053","display_name":"Da Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Da","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128911283","display_name":"Bin Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Bin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112899268","display_name":"Wenmeng Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Wenmeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128803686","display_name":"Xiaotao Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Xiaotao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128856935","display_name":"Jie Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Jie","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":23,"corresponding_author_ids":["https://openalex.org/A5128855462"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.968999981880188,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.968999981880188,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.008999999612569809,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.0026000000070780516,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6543999910354614},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5889999866485596},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.58160001039505},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5302000045776367},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5184999704360962},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.5005999803543091},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.4717000126838684},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4406999945640564},{"id":"https://openalex.org/keywords/standardization","display_name":"Standardization","score":0.3846000134944916}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7050999999046326},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6543999910354614},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5889999866485596},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.58160001039505},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5302000045776367},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5184999704360962},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.5005999803543091},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.4717000126838684},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4406999945640564},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3849000036716461},{"id":"https://openalex.org/C188087704","wikidata":"https://www.wikidata.org/wiki/Q369577","display_name":"Standardization","level":2,"score":0.3846000134944916},{"id":"https://openalex.org/C182449105","wikidata":"https://www.wikidata.org/wiki/Q3099732","display_name":"Technical report","level":2,"score":0.3824000060558319},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.3653999865055084},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34619998931884766},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3450999855995178},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.34439998865127563},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.3382999897003174},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3197000026702881},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3133000135421753},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.30630001425743103},{"id":"https://openalex.org/C554936623","wikidata":"https://www.wikidata.org/wiki/Q199657","display_name":"Reading (process)","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C2779547435","wikidata":"https://www.wikidata.org/wiki/Q1121492","display_name":"Directive","level":2,"score":0.2728999853134155},{"id":"https://openalex.org/C98025372","wikidata":"https://www.wikidata.org/wiki/Q477538","display_name":"Systems architecture","level":3,"score":0.2635999917984009},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.2556999921798706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.10910","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10910","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.10910","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10910","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"GLM-OCR":[0,48,102],"is":[1,80],"an":[2],"efficient":[3],"0.9B-parameter":[4],"compact":[5,123],"multimodal":[6],"model":[7],"designed":[8],"for":[9,131],"real-world":[10],"document":[11,109],"understanding.":[12],"It":[13],"combines":[14],"a":[15,21,27,50,77],"0.4B-parameter":[16],"CogViT":[17],"visual":[18],"encoder":[19],"with":[20],"0.5B-parameter":[22],"GLM":[23],"language":[24],"decoder,":[25],"achieving":[26],"strong":[28],"balance":[29],"between":[30],"computational":[31],"efficiency":[32],"and":[33,97,112,118,125,136],"recognition":[34],"performance.":[35],"To":[36],"address":[37],"the":[38,74],"inefficiency":[39],"of":[40],"standard":[41],"autoregressive":[42],"decoding":[43,63],"in":[44,108],"deterministic":[45],"OCR":[46],"tasks,":[47],"introduces":[49],"Multi-Token":[51],"Prediction":[52],"(MTP)":[53],"mechanism":[54],"that":[55,101],"predicts":[56],"multiple":[57],"tokens":[58],"per":[59],"step,":[60],"significantly":[61],"improving":[62],"throughput":[64],"while":[65],"keeping":[66],"memory":[67],"overhead":[68],"low":[69],"through":[70],"shared":[71],"parameters.":[72],"At":[73],"system":[75],"level,":[76],"two-stage":[78],"pipeline":[79],"adopted:":[81],"PP-DocLayout-V3":[82],"first":[83],"performs":[84],"layout":[85],"analysis,":[86],"followed":[87],"by":[88],"parallel":[89],"region-level":[90],"recognition.":[91],"Extensive":[92],"evaluations":[93],"on":[94],"public":[95],"benchmarks":[96],"industrial":[98],"scenarios":[99],"show":[100],"achieves":[103],"competitive":[104],"or":[105],"state-of-the-art":[106],"performance":[107],"parsing,":[110],"text":[111],"formula":[113],"transcription,":[114],"table":[115],"structure":[116],"recovery,":[117],"key":[119],"information":[120],"extraction.":[121],"Its":[122],"architecture":[124],"structured":[126],"generation":[127],"make":[128],"it":[129],"suitable":[130],"both":[132],"resource-constrained":[133],"edge":[134],"deployment":[135],"large-scale":[137],"production":[138],"systems.":[139]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2026-03-13T00:00:00"}
