{"id":"https://openalex.org/W4210692322","doi":"https://doi.org/10.1109/tmm.2022.3146779","title":"A Two-Level Rectification Attention Network for Scene Text Recognition","display_name":"A Two-Level Rectification Attention Network for Scene Text Recognition","publication_year":2022,"publication_date":"2022-01-27","ids":{"openalex":"https://openalex.org/W4210692322","doi":"https://doi.org/10.1109/tmm.2022.3146779"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2022.3146779","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3146779","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008891315","display_name":"Lintai Wu","orcid":"https://orcid.org/0000-0002-9260-0980"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]},{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210144487","display_name":"Cloud Computing Center","ror":"https://ror.org/04aa0zm65","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210144487"]}],"countries":["CN","HK"],"is_corresponding":true,"raw_author_name":"Lintai Wu","raw_affiliation_strings":["Bio-Computing Research Center, Harbin Institute of Technology, Shenzhen, Guangdong, China","Shenzhen Key Laboratory of Visual Object Detection and Recognition, Shenzhen, Guangdong, China","Department of Computer Science, City University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Bio-Computing Research Center, Harbin Institute of Technology, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210144487","https://openalex.org/I204983213"]},{"raw_affiliation_string":"Shenzhen Key Laboratory of Visual Object Detection and Recognition, Shenzhen, Guangdong, China","institution_ids":[]},{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100359961","display_name":"Yong Xu","orcid":"https://orcid.org/0000-0003-0530-2123"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I4210144487","display_name":"Cloud Computing Center","ror":"https://ror.org/04aa0zm65","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210144487"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Xu","raw_affiliation_strings":["Bio-Computing Research Center, Harbin Institute of Technology, Shenzhen, Guangdong, China","Shenzhen Key Laboratory of Visual Object Detection and Recognition, Shenzhen, Guangdong, China","Peng Cheng Laboratory, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Bio-Computing Research Center, Harbin Institute of Technology, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210144487","https://openalex.org/I204983213"]},{"raw_affiliation_string":"Shenzhen Key Laboratory of Visual Object Detection and Recognition, Shenzhen, Guangdong, China","institution_ids":[]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031957432","display_name":"Junhui Hou","orcid":"https://orcid.org/0000-0003-3431-2021"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]},{"id":"https://openalex.org/I4210105229","display_name":"City University of Hong Kong, Shenzhen Research Institute","ror":"https://ror.org/00xc0ma20","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210105229"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Junhui Hou","raw_affiliation_strings":["Department of Computer Science, City University of Hong Kong, Hong Kong","Shenzhen Research Institute, City University of Hong Kong, Hong Kong"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, City University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708"]},{"raw_affiliation_string":"Shenzhen Research Institute, City University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I168719708","https://openalex.org/I4210105229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100643265","display_name":"C. L. Philip Chen","orcid":"https://orcid.org/0000-0001-5451-7230"},"institutions":[{"id":"https://openalex.org/I90610280","display_name":"South China University of Technology","ror":"https://ror.org/0530pts50","country_code":"CN","type":"education","lineage":["https://openalex.org/I90610280"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"C. L. Philip Chen","raw_affiliation_strings":["School of Computer Science and Engineering, South China University of Technology, Guangzhou, China","Pazhou Lab, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, South China University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I90610280"]},{"raw_affiliation_string":"Pazhou Lab, Guangzhou, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100714202","display_name":"Cheng\u2010Lin Liu","orcid":"https://orcid.org/0000-0002-6743-4175"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng-Lin Liu","raw_affiliation_strings":["NLPR, Institute of Automation, Chinese Academy of Sciences, Beijing, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"NLPR, Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5008891315"],"corresponding_institution_ids":["https://openalex.org/I168719708","https://openalex.org/I204983213","https://openalex.org/I4210144487"],"apc_list":null,"apc_paid":null,"fwci":2.8539,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.91939237,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"25","issue":null,"first_page":"2404","last_page":"2414"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8279520273208618},{"id":"https://openalex.org/keywords/rectification","display_name":"Rectification","score":0.5449605584144592},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49378642439842224},{"id":"https://openalex.org/keywords/attention-network","display_name":"Attention network","score":0.4648415446281433},{"id":"https://openalex.org/keywords/text-recognition","display_name":"Text recognition","score":0.44835221767425537},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4430301785469055},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4241005480289459},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41520610451698303},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.32302525639533997},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.13302987813949585}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8279520273208618},{"id":"https://openalex.org/C50942859","wikidata":"https://www.wikidata.org/wiki/Q4967193","display_name":"Rectification","level":3,"score":0.5449605584144592},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49378642439842224},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.4648415446281433},{"id":"https://openalex.org/C2983812711","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text recognition","level":3,"score":0.44835221767425537},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4430301785469055},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4241005480289459},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41520610451698303},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.32302525639533997},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.13302987813949585},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2022.3146779","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2022.3146779","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1898488731","display_name":null,"funder_award_id":"61876051","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":87,"referenced_works":["https://openalex.org/W6908809","https://openalex.org/W70975097","https://openalex.org/W603908379","https://openalex.org/W1488125194","https://openalex.org/W1491389626","https://openalex.org/W1557952530","https://openalex.org/W1922126009","https://openalex.org/W1971822075","https://openalex.org/W1972065312","https://openalex.org/W1978729128","https://openalex.org/W1981283549","https://openalex.org/W1988461287","https://openalex.org/W1998042868","https://openalex.org/W2008806374","https://openalex.org/W2124404372","https://openalex.org/W2127141656","https://openalex.org/W2128409098","https://openalex.org/W2140132917","https://openalex.org/W2142159465","https://openalex.org/W2144554289","https://openalex.org/W2146835493","https://openalex.org/W2153182373","https://openalex.org/W2157331557","https://openalex.org/W2194187530","https://openalex.org/W2194775991","https://openalex.org/W2294053032","https://openalex.org/W2343052201","https://openalex.org/W2593572697","https://openalex.org/W2618530766","https://openalex.org/W2750938222","https://openalex.org/W2752782242","https://openalex.org/W2788069964","https://openalex.org/W2788840914","https://openalex.org/W2795619303","https://openalex.org/W2810983211","https://openalex.org/W2894730515","https://openalex.org/W2904892887","https://openalex.org/W2906333459","https://openalex.org/W2911295582","https://openalex.org/W2922509574","https://openalex.org/W2935940070","https://openalex.org/W2949708697","https://openalex.org/W2962790387","https://openalex.org/W2963233387","https://openalex.org/W2963327605","https://openalex.org/W2963517393","https://openalex.org/W2963526661","https://openalex.org/W2963648432","https://openalex.org/W2963712589","https://openalex.org/W2963848795","https://openalex.org/W2964018263","https://openalex.org/W2964300754","https://openalex.org/W2965066169","https://openalex.org/W2979371747","https://openalex.org/W2980487166","https://openalex.org/W2986036729","https://openalex.org/W2997864923","https://openalex.org/W2998382406","https://openalex.org/W3002645628","https://openalex.org/W3021342796","https://openalex.org/W3021481629","https://openalex.org/W3033835243","https://openalex.org/W3034447740","https://openalex.org/W3035106683","https://openalex.org/W3035449864","https://openalex.org/W3082397598","https://openalex.org/W3088146056","https://openalex.org/W3106271744","https://openalex.org/W3110267192","https://openalex.org/W3119350346","https://openalex.org/W3154027117","https://openalex.org/W3162424363","https://openalex.org/W3163173475","https://openalex.org/W3168963360","https://openalex.org/W3204479434","https://openalex.org/W3209806402","https://openalex.org/W4294629828","https://openalex.org/W6600284362","https://openalex.org/W6618372016","https://openalex.org/W6629590909","https://openalex.org/W6633516429","https://openalex.org/W6638296183","https://openalex.org/W6642972425","https://openalex.org/W6649973027","https://openalex.org/W6741742085","https://openalex.org/W6755730432","https://openalex.org/W6776488277"],"related_works":["https://openalex.org/W2355499516","https://openalex.org/W3174480258","https://openalex.org/W3210974833","https://openalex.org/W2599000612","https://openalex.org/W2811475781","https://openalex.org/W2094246381","https://openalex.org/W2361137193","https://openalex.org/W2377736761","https://openalex.org/W2353928006","https://openalex.org/W2797910360"],"abstract_inverted_index":{"Scene":[0],"text":[1,17,30,139],"recognition":[2,33,102],"is":[3,58,66,136],"a":[4,77,94,118,155,179,208],"challenging":[5],"task":[6],"in":[7,140],"the":[8,14,20,23,50,64,70,106,112,125,141,146,164],"computer":[9],"vision":[10],"field":[11],"due":[12],"to":[13,38,68,83,137,166,183],"diversity":[15],"of":[16,22,44,91,150,170],"styles":[18],"and":[19,32,63,85,99,115,158,173,197],"complexity":[21],"image":[24],"backgrounds.":[25],"In":[26,72],"recent":[27],"decades,":[28],"numerous":[29],"rectification":[31,79,96],"methods":[34,46],"have":[35],"been":[36],"proposed":[37],"solve":[39],"these":[40,45],"problems.":[41],"However,":[42],"most":[43],"rectify":[47,84],"texts":[48,110],"at":[49,111],"geometry":[51,113],"level":[52,114],"or":[53],"pixel":[54],"level.":[55],"The":[56,133,200],"former":[57],"limited":[59],"by":[60],"geometric":[61,126],"constraints,":[62],"latter":[65],"prone":[67],"blurring":[69],"text.":[71],"this":[73],"paper,":[74],"we":[75,153,177],"propose":[76,178],"two-level":[78,95],"attention":[80,160],"network":[81,89,97,103,165],"(TRAN)":[82],"recognize":[86,138],"texts.":[87,132],"This":[88],"consists":[90],"two":[92],"parts:":[93],"(TORN)":[98],"an":[100],"attention-based":[101],"(ABRN).":[104],"Specifically,":[105],"TORN":[107],"first":[108],"rectifies":[109],"then":[116],"performs":[117],"pixel-level":[119],"adjustment,":[120],"which":[121,162],"not":[122],"only":[123],"eliminates":[124],"constraints":[127],"but":[128],"also":[129],"renders":[130],"clear":[131],"ABRN\u2019s":[134],"role":[135],"rectified":[142],"images.":[143],"To":[144],"improve":[145],"feature":[147],"extraction":[148],"ability":[149],"our":[151,185,205],"model,":[152],"design":[154],"new":[156],"channel-wise":[157],"kernel-wise":[159],"unit,":[161],"enables":[163],"handle":[167],"significant":[168],"variations":[169],"character":[171],"size":[172],"channel":[174],"interdependencies.":[175],"Furthermore,":[176],"skip":[180],"training":[181],"strategy":[182],"make":[184],"model":[186],"converge":[187],"smoothly.":[188],"We":[189],"conduct":[190],"experiments":[191],"on":[192],"various":[193],"benchmarks,":[194],"including":[195],"regular":[196],"irregular":[198],"datasets.":[199],"experimental":[201],"results":[202],"show":[203],"that":[204],"method":[206],"achieves":[207],"state-of-the-art":[209],"performance.":[210]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2}],"updated_date":"2026-03-31T07:56:22.981413","created_date":"2025-10-10T00:00:00"}
