{"id":"https://openalex.org/W3207713347","doi":"https://doi.org/10.1145/3474085.3475388","title":"DocTr: Document Image Transformer for Geometric Unwarping and Illumination Correction","display_name":"DocTr: Document Image Transformer for Geometric Unwarping and Illumination Correction","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3207713347","doi":"https://doi.org/10.1145/3474085.3475388","mag":"3207713347"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475388","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475388","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068201443","display_name":"Hao Feng","orcid":"https://orcid.org/0000-0001-8127-6639"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao Feng","raw_affiliation_strings":[""],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102902110","display_name":"Yuechen Wang","orcid":"https://orcid.org/0000-0001-6098-6164"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuechen Wang","raw_affiliation_strings":[""],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046805800","display_name":"Wengang Zhou","orcid":"https://orcid.org/0000-0003-1690-9836"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wengang Zhou","raw_affiliation_strings":[""],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089561793","display_name":"Jiajun Deng","orcid":"https://orcid.org/0000-0001-9624-7451"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiajun Deng","raw_affiliation_strings":[""],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078141810","display_name":"Houqiang Li","orcid":"https://orcid.org/0000-0003-2188-3028"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Houqiang Li","raw_affiliation_strings":[""],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.3661,"has_fulltext":false,"cited_by_count":64,"citation_normalized_percentile":{"value":0.9566136,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"273","last_page":"281"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6819394826889038},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6034739017486572},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5732181668281555},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5472358465194702},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.4739917516708374},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.45656877756118774},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.17220202088356018},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11112743616104126}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6819394826889038},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6034739017486572},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5732181668281555},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5472358465194702},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.4739917516708374},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.45656877756118774},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.17220202088356018},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11112743616104126},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3474085.3475388","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475388","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8299740667","display_name":null,"funder_award_id":"61836011 and 61632019","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W203808714","https://openalex.org/W1647671624","https://openalex.org/W1686810756","https://openalex.org/W1901129140","https://openalex.org/W2027238116","https://openalex.org/W2044607679","https://openalex.org/W2047643928","https://openalex.org/W2090518410","https://openalex.org/W2115263911","https://openalex.org/W2121498135","https://openalex.org/W2132083787","https://openalex.org/W2133665775","https://openalex.org/W2150504994","https://openalex.org/W2161175365","https://openalex.org/W2170265032","https://openalex.org/W2412396436","https://openalex.org/W2750779823","https://openalex.org/W2798905980","https://openalex.org/W2959024664","https://openalex.org/W2981771415","https://openalex.org/W2985998306","https://openalex.org/W3049081235","https://openalex.org/W3094927345","https://openalex.org/W3096609285","https://openalex.org/W3167536469","https://openalex.org/W3168649818","https://openalex.org/W6600050674"],"related_works":["https://openalex.org/W1891287906","https://openalex.org/W2036807459","https://openalex.org/W2775347418","https://openalex.org/W1969923398","https://openalex.org/W2772917594","https://openalex.org/W2166024367","https://openalex.org/W2755342338","https://openalex.org/W2119567889","https://openalex.org/W3116076068","https://openalex.org/W2229312674"],"abstract_inverted_index":{"In":[0],"this":[1],"work,":[2],"we":[3],"propose":[4],"a":[5,30,41,118],"new":[6],"framework,":[7],"called":[8],"Document":[9],"Image":[10],"Transformer":[11],"(DocTr),":[12],"to":[13,68,85],"address":[14],"the":[15,23,47,52,56,64,70,82,87,106,123],"issue":[16],"of":[17,22,29,43,55],"geometry":[18],"and":[19,34,62,90,100,135],"illumination":[20,36,77],"distortion":[21],"document":[24,57],"images.":[25],"Specifically,":[26],"DocTr":[27,111],"consists":[28],"geometric":[31,48,71,74],"unwarping":[32,49],"transformer":[33,50,79],"an":[35],"correction":[37,78],"transformer.":[38],"By":[39],"setting":[40],"set":[42],"learned":[44],"query":[45],"embedding,":[46],"captures":[51],"global":[53],"context":[54],"image":[58],"by":[59],"self-attention":[60],"mechanism":[61],"decodes":[63],"pixel-wise":[65],"displacement":[66],"solution":[67],"correct":[69],"distortion.":[72],"After":[73],"unwarping,":[75],"our":[76,110],"further":[80],"removes":[81],"shading":[83],"artifacts":[84],"improve":[86],"visual":[88],"quality":[89],"OCR":[91],"accuracy.":[92],"Extensive":[93],"evaluations":[94],"are":[95,103],"conducted":[96],"on":[97,132],"several":[98],"datasets,":[99],"superior":[101],"results":[102],"reported":[104],"against":[105],"state-of-the-art":[107,124],"methods.":[108,125],"Remarkably,":[109],"achieves":[112],"$20.02%$":[113],"Character":[114],"Error":[115],"Rate":[116],"(CER),":[117],"$15%$":[119],"absolute":[120],"improvement":[121],"over":[122],"Moreover,":[126],"it":[127],"also":[128],"shows":[129],"high":[130],"efficiency":[131],"running":[133],"time":[134],"parameter":[136],"count.":[137]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":16},{"year":2022,"cited_by_count":12}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
