{"id":"https://openalex.org/W4292825881","doi":"https://doi.org/10.1109/tip.2022.3197981","title":"PETR: Rethinking the Capability of Transformer-Based Language Model in Scene Text Recognition","display_name":"PETR: Rethinking the Capability of Transformer-Based Language Model in Scene Text Recognition","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4292825881","doi":"https://doi.org/10.1109/tip.2022.3197981","pmid":"https://pubmed.ncbi.nlm.nih.gov/35998166"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2022.3197981","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2022.3197981","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100427468","display_name":"Yuxin Wang","orcid":"https://orcid.org/0000-0002-0228-6220"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuxin Wang","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078162380","display_name":"Hongtao Xie","orcid":"https://orcid.org/0000-0002-6249-5315"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongtao Xie","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008604905","display_name":"Shancheng Fang","orcid":"https://orcid.org/0000-0002-3100-3664"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shancheng Fang","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054620019","display_name":"Mengting Xing","orcid":"https://orcid.org/0000-0002-7285-6671"},"institutions":[{"id":"https://openalex.org/I98301712","display_name":"Baidu (China)","ror":"https://ror.org/03vs3wt56","country_code":"CN","type":"company","lineage":["https://openalex.org/I98301712"]},{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengting Xing","raw_affiliation_strings":["Baidu Intelligent Cloud, Chengdu, China","School of Information Science and Technology, University of Science and Technology of China, China"],"affiliations":[{"raw_affiliation_string":"Baidu Intelligent Cloud, Chengdu, China","institution_ids":["https://openalex.org/I98301712"]},{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100378720","display_name":"Jing Wang","orcid":"https://orcid.org/0000-0003-4567-3869"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Wang","raw_affiliation_strings":["Huawei Cloud, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Cloud, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003746644","display_name":"Shenggao Zhu","orcid":"https://orcid.org/0000-0002-3254-0058"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shenggao Zhu","raw_affiliation_strings":["Huawei Cloud, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Huawei Cloud, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046305086","display_name":"Yongdong Zhang","orcid":"https://orcid.org/0000-0002-1151-1792"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongdong Zhang","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100427468"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":3.0198,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.92761699,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"31","issue":null,"first_page":"5585","last_page":"5598"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.765275239944458},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7469174861907959},{"id":"https://openalex.org/keywords/rectification","display_name":"Rectification","score":0.6191225647926331},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6084736585617065},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5701366662979126},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.43227076530456543},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.42041879892349243},{"id":"https://openalex.org/keywords/character-recognition","display_name":"Character recognition","score":0.41582614183425903},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.374514102935791},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35629159212112427},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.20006105303764343},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.18353331089019775},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11130431294441223}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.765275239944458},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7469174861907959},{"id":"https://openalex.org/C50942859","wikidata":"https://www.wikidata.org/wiki/Q4967193","display_name":"Rectification","level":3,"score":0.6191225647926331},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6084736585617065},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5701366662979126},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43227076530456543},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42041879892349243},{"id":"https://openalex.org/C2987247673","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Character recognition","level":3,"score":0.41582614183425903},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.374514102935791},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35629159212112427},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.20006105303764343},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.18353331089019775},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11130431294441223},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2022.3197981","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2022.3197981","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:35998166","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35998166","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5}],"awards":[{"id":"https://openalex.org/G1484805094","display_name":null,"funder_award_id":"WK3480000011","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G1615224517","display_name":null,"funder_award_id":"2018YFB0804203","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G3316061764","display_name":null,"funder_award_id":"62022076","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3595397656","display_name":null,"funder_award_id":"Y2021122","funder_id":"https://openalex.org/F4320321133","funder_display_name":"Chinese Academy of Sciences"},{"id":"https://openalex.org/G3828173677","display_name":null,"funder_award_id":"62121002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6179918720","display_name":null,"funder_award_id":"U1936210","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W70975097","https://openalex.org/W1591801644","https://openalex.org/W1720745709","https://openalex.org/W1971822075","https://openalex.org/W1978729128","https://openalex.org/W1981283549","https://openalex.org/W1988461287","https://openalex.org/W1998042868","https://openalex.org/W2008806374","https://openalex.org/W2013360608","https://openalex.org/W2061802763","https://openalex.org/W2127141656","https://openalex.org/W2144554289","https://openalex.org/W2146835493","https://openalex.org/W2194187530","https://openalex.org/W2294053032","https://openalex.org/W2340583188","https://openalex.org/W2341629100","https://openalex.org/W2343052201","https://openalex.org/W2530921800","https://openalex.org/W2606394513","https://openalex.org/W2750938222","https://openalex.org/W2751748110","https://openalex.org/W2795619303","https://openalex.org/W2810983211","https://openalex.org/W2896034938","https://openalex.org/W2896457183","https://openalex.org/W2912435603","https://openalex.org/W2952285877","https://openalex.org/W2961018736","https://openalex.org/W2963233387","https://openalex.org/W2963353821","https://openalex.org/W2963712589","https://openalex.org/W2965066169","https://openalex.org/W2970910956","https://openalex.org/W2979371747","https://openalex.org/W2984470164","https://openalex.org/W2997749585","https://openalex.org/W2998382406","https://openalex.org/W3003642782","https://openalex.org/W3003711889","https://openalex.org/W3005436539","https://openalex.org/W3034447740","https://openalex.org/W3035106683","https://openalex.org/W3035449864","https://openalex.org/W3092894544","https://openalex.org/W3095672411","https://openalex.org/W3096609285","https://openalex.org/W3106271744","https://openalex.org/W3110267192","https://openalex.org/W3113987534","https://openalex.org/W3122930709","https://openalex.org/W3134064484","https://openalex.org/W3170697543","https://openalex.org/W3172752666","https://openalex.org/W3181186176","https://openalex.org/W3202415716","https://openalex.org/W6629590909","https://openalex.org/W6635446068","https://openalex.org/W6637484576","https://openalex.org/W6739901393","https://openalex.org/W6744179516","https://openalex.org/W6755207826","https://openalex.org/W6764306230"],"related_works":["https://openalex.org/W3049463507","https://openalex.org/W2936497627","https://openalex.org/W4288365749","https://openalex.org/W3013624417","https://openalex.org/W4287826556","https://openalex.org/W4287598411","https://openalex.org/W3098382480","https://openalex.org/W3094871513","https://openalex.org/W3198458223","https://openalex.org/W4288267738"],"abstract_inverted_index":{"The":[0,58,72,279],"exploration":[1],"of":[2,8,69,105,135,232,247,252],"linguistic":[3,125],"information":[4,126],"promotes":[5],"the":[6,15,38,49,66,103,124,128,133,142,147,159,175,190,230,245,292],"development":[7],"scene":[9],"text":[10],"recognition":[11,39,134],"task.":[12],"Benefiting":[13],"from":[14,37,236],"significance":[16],"in":[17,88,127,141,201],"parallel":[18,258],"reasoning":[19],"and":[20,79,154,226,250,264,269,285],"global":[21],"relationship":[22,173],"capture,":[23],"transformer-based":[24,106,233,259],"language":[25,85,107,194,203,234,253],"model":[26,108,149,235],"(TLM)":[27],"has":[28],"achieved":[29],"dominant":[30],"performance":[31],"recently.":[32],"As":[33],"a":[34,83,95,115,180,206,237],"decoupled":[35],"structure":[36],"process,":[40],"we":[41,93],"argue":[42],"that":[43,241,289],"TLM's":[44],"capability":[45,104,231],"is":[46,120,168,186,211],"limited":[47],"by":[48,109,170],"input":[50],"low-quality":[51],"visual":[52,59,77,129,162,177],"prediction.":[53],"To":[54],"be":[55],"specific:":[56],"1)":[57],"prediction":[60,78,157,163],"with":[61,138,164,257],"low":[62],"character-wise":[63,166],"accuracy":[64,167],"increases":[65],"correction":[67,248],"burden":[68,249],"TLM.":[70,89],"2)":[71],"inconsistent":[73],"word":[74,191],"length":[75,192],"between":[76,174],"original":[80],"image":[81],"provides":[82],"wrong":[84],"modeling":[86,204,254],"guidance":[87,195],"In":[90],"this":[91],"paper,":[92],"propose":[94],"Progressive":[96],"scEne":[97],"Text":[98],"Recognizer":[99],"(PETR)":[100],"to":[101,122,150,188,213],"improve":[102],"handling":[110],"above":[111],"two":[112],"problems.":[113],"Firstly,":[114],"Destruction":[116],"Learning":[117],"Module":[118,184],"(DLM)":[119],"proposed":[121,187],"consider":[123],"context.":[130],"DLM":[131,225],"introduces":[132],"destructed":[136,160],"images":[137],"disordered":[139],"patches":[140],"training":[143],"stage.":[144],"Through":[145,197],"guiding":[146],"vision":[148],"restore":[151],"patch":[152],"orders":[153],"make":[155],"word-level":[156],"on":[158,244,267,282],"images,":[161],"high":[165],"obtained":[169],"exploring":[171],"inner":[172],"local":[176],"patches.":[178],"Secondly,":[179],"new":[181],"Language":[182],"Rectification":[183],"(LRM)":[185],"optimize":[189],"for":[193],"rectification.":[196],"progressively":[198],"implementing":[199],"LRM":[200],"different":[202],"steps,":[205],"novel":[207],"progressive":[208],"rectification":[209,251],"network":[210],"constructed":[212],"handle":[214],"some":[215],"extremely":[216],"challenging":[217],"cases":[218],"(e.g.":[219],"distortion,":[220],"occlusion,":[221],"etc.).":[222],"By":[223],"utilizing":[224],"LRM,":[227],"PETR":[228,261,290],"enhances":[229],"more":[238],"general":[239],"aspect,":[240],"is,":[242],"focusing":[243],"reduction":[246],"guidance.":[255],"Compared":[256],"methods,":[260],"obtains":[262],"1.0%":[263],"0.8%":[265],"improvement":[266],"regular":[268],"irregular":[270],"datasets":[271],"respectively":[272],"while":[273],"introducing":[274],"only":[275],"1.7M":[276],"additional":[277],"parameters.":[278],"extensive":[280],"experiments":[281],"both":[283],"English":[284],"Chinese":[286],"benchmarks":[287],"demonstrate":[288],"achieves":[291],"state-of-the-art":[293],"results.":[294]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":11}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
